6 - Validacion de EcuacioneS KFRE Recalibradas en datos imputados

Author

Percy Soto Becerra

1 Code to reproduce results of the manuscript ‘Kidney Failure Prediction: Multicenter External Validation of the KFRE Model in Patients with CKD Stages 3-4 in Peru’

1.1 Introduction

This document presents the code and results of the main analysis shown in the article.

1.2 Setup

rm(list = ls())

# Use pacman to check whether packages are installed, if not load
if (!require("pacman")) install.packages("pacman")
library(pacman)

# Unload all package to begin in a session with only base packages
pacman::p_unload("all")

# Install packages
pacman::p_load(
  here, 
  skimr, 
  survival,
  survminer, 
  rms,
  cmprsk,
  riskRegression,
  mstate,
  pseudo,
  pec,
  plotrix,
  knitr,
  splines,
  kableExtra,
  flextable,
  gtsummary,
  boot,
  tidyverse,
  rsample,
  gridExtra,
  webshot, 
  patchwork,
  survival, 
  ggsci, 
  cowplot, 
  scales, 
  patchwork, 
  labelled, 
  glue, 
  dcurves, 
  broom, 
  downlit, 
  xml2, 
  gghalves, 
  devtools, 
  htmltools, 
  gghalves, 
  ggtext, 
  DiagrammeR, 
  gt, 
  janitor, 
  VIM, 
  PerformanceAnalytics, 
  mice, 
  rms, 
  naniar, 
  DescTools, 
  gtools, 
  ggExtra, 
  furrr, 
  future, 
  ggmice,
  parallel,
  tictoc, 
  rio,
  tidymodels
)

if (!require("impstep")) remotes::install_github("bgravesteijn/impstep", force = TRUE)
if (!require("smplot2")) devtools::install_github('smin95/smplot2', force = TRUE)

library(impstep)

1.2.0.1 Funciones

source(here("Code", "source", "kfre_pi.R"))
source(here("Code", "source", "kfre_pr.R"))
source(here("Code", "source", "oe_ratio.R"))
source(here("Code", "source", "calibration_intercept.R"))
source(here("Code", "source", "calibration_slope.R"))
source(here("Code", "source", "auc.R"))
source(here("Code", "source", "auc_brier_boot.R"))
source(here("Code", "source", "validate.mids.R"))
source(here("Code", "source", "pool.validate.mids.R"))
source(here("Code", "source", "pool.auc.mids.R"))
source(here("Code", "source", "process_imp_cal_plot.R"))
source(here("Code", "source", "predict.mira.R"))
source(here("Code", "source", "performance_measures.R"))
source(here("Code", "source", "tidy_performance_stack.R"))    
source(here("Code", "source", "tidy_pool.R")) 
source(here("Code", "source", "process_imp_cal_plot2.R")) 
source(here("Code", "source", "print_equation.R")) 

1.2.1 Importar datos

Las ecuaciones originales se muestran a continuacion:

eq_kfre_original2y <- data.frame(
  vars = c("age", "male", "eGFR", "logACR"),
  coefs = c(-0.2201, 0.2467, -0.5567, 0.4510),
  scale = c(10, 1, 5, 1),
  center = c(7.036, 0.5642, 7.222, 5.137)
)

st02y <- 0.9832


eq_kfre_original5y <- data.frame(
  vars = c("age", "male", "eGFR", "logACR"),
  coefs = c(-0.2201, 0.2467, -0.5567, 0.4510),
  scale = c(10, 1, 5, 1),
  center = c(7.036, 0.5642, 7.222, 5.137)
)
st05y <- 0.9365
  • A 2 anios:
print_equation(eq_kfre_original2y, st02y)

\[1 - 0.9832^{exp(- 0.2201 \times (age / 10 - 7.036) + 0.2467 \times (male / 1 - 0.5642) - 0.5567 \times (eGFR / 5 - 7.222) + 0.451 \times (logACR / 1 - 5.137))}\]

  • A 5 anios:
print_equation(eq_kfre_original2y, st02y)

\[1 - 0.9832^{exp(- 0.2201 \times (age / 10 - 7.036) + 0.2467 \times (male / 1 - 0.5642) - 0.5567 \times (eGFR / 5 - 7.222) + 0.451 \times (logACR / 1 - 5.137))}\]

Las nuevas sobrevidas basales y los factores de recalibracion de los coeficientes de las ecuaciones recalibradas, dependiendo del metodo usado, se muestran a continuacion

recal_loads <- import(here("Data", "Tidy", "equations", "recal_loads.rds")) 
recal_loads |> 
  kbl() |> 
  kable_classic()
metodo year sobrev_recal fc_coef
A 2 0.9766676 1.0000000
A 5 0.9496968 1.0000000
B 2 0.9624290 0.7405622
B 5 0.9241750 0.7405622
C 2 0.9772796 1.0000000
C 5 0.9537293 1.0000000
D 2 0.9639901 0.7405622
D 5 0.9328086 0.7405622

1.3 Configurar cores para parallel processing

n_cores <- detectCores()

# Evaluate futures in parallel - max of two workers to avoid hogging resources
future::plan("multisession", workers = n_cores)

1.4 Set some constants

seed <- 2014
primary_event <- 1
imputs <- 4

1.5 Importar datos

data_impA <- readRDS(here::here("Data", "Tidy", "data_impA.rds")) 

imp.datosA <- complete(data_impA, action = "long", include = TRUE) |> 
  filter(.imp != 0) |> 
  filter(.imp < imputs)

imp.datosA2 <- imp.datosA 

1.6 Metodo A: Reestimar riesgo basal usando Cox

source(here("Code", "source", "kfre_pi.R"))
source(here("Code", "source", "kfre_pr.R"))
source(here("Code", "source", "oe_ratio.R"))
source(here("Code", "source", "calibration_intercept.R"))
source(here("Code", "source", "calibration_slope.R"))
source(here("Code", "source", "auc.R"))
source(here("Code", "source", "auc_brier_boot.R"))
source(here("Code", "source", "validate.mids.R"))
source(here("Code", "source", "pool.validate.mids.R"))
source(here("Code", "source", "pool.auc.mids.R"))
source(here("Code", "source", "process_imp_cal_plot.R"))
source(here("Code", "source", "predict.mira.R"))
source(here("Code", "source", "performance_measures.R"))
source(here("Code", "source", "tidy_performance_stack.R"))    
source(here("Code", "source", "tidy_pool.R")) 
source(here("Code", "source", "process_imp_cal_plot2.R")) 
source(here("Code", "source", "print_equation.R")) 
df_recal_metA <- import(here("Data", "Tidy", "equations", "df_recal_modA.rds"))

df_recal_metA2y <- df_recal_metA |> 
  filter(year == 2) |> 
  select(-year) |> 
  rename(st0_imp2y = st0_imp, 
         fc_coef_imp2y = fc_coef_imp)

df_recal_metA5y <- df_recal_metA |> 
  filter(year == 5) |> 
  select(-year) |> 
  rename(st0_imp5y = st0_imp, 
         fc_coef_imp5y = fc_coef_imp)

rm(df_recal_metA)

imp.datosA <- imp.datosA2 |> 
  left_join(df_recal_metA2y, by = ".imp") |> 
  left_join(df_recal_metA5y, by = ".imp") |> 
  mutate(eventdf = factor(eventd), 
        risk2y = 1 - st0_imp2y ^ exp(fc_coef_imp2y * kfre_pi(imp.datosA2)), 
        risk5y = 1 - st0_imp5y ^ exp(fc_coef_imp5y * kfre_pi(imp.datosA2))) |> 
  select(.imp, .id, time, eventd, eventdf, risk2y, risk5y)

rm(df_recal_metA2y, df_recal_metA5y)
head(imp.datosA)

1.6.1 Calibration and Discrimination Measures

future::plan("multisession", workers = n_cores)
results_stack3a4_2y <- tidy_performance_stack(imp.datosA, 
                                  horizon = 2, 
                                  primary_event = 1, 
                                  pred = "risk2y",
                                  seed = seed, 
                                  n_cores = n_cores)

gc()
           used  (Mb) gc trigger  (Mb)  max used   (Mb)
Ncells  3765407 201.1    7207984 385.0   7207984  385.0
Vcells 19995951 152.6  119984267 915.5 187462211 1430.3
rio::export(results_stack3a4_2y , here("Data", "Tidy", 
                                       "res_valext_kfre_stack3a4_2y_metA.rds"))

future::plan("multisession", workers = n_cores)
results_stack3a4_5y <- tidy_performance_stack(imp.datosA, 
                                  horizon = 5, 
                                  primary_event = 1, 
                                  pred = "risk5y",
                                  seed = seed, 
                                  n_cores = n_cores)

rio::export(results_stack3a4_5y, here("Data", "Tidy", 
                                      "res_valext_kfre_stack3a4_5y_metA.rds"))

gc()
           used  (Mb) gc trigger  (Mb)  max used   (Mb)
Ncells  3771028 201.4    7207984 385.0   7207984  385.0
Vcells 20009221 152.7   95987414 732.4 187462211 1430.3
res_pool1 <- tidy_pool(results_stack3a4_2y) 

res_pool1 |> 
  kbl() |> 
  kable_classic(full_width = T, html_font = "Cambria")
term estimate ll ul pval ubar b t dfcom df riv lambda fmi n
Average predicted risk 0.0253658 NaN NaN NA NaN 0.0000000 NaN 30030 NA NaN NaN NaN 30031
Overall observerd risk 0.0273046 0.0254293 0.0291799 0.0000000 0.0000009 0.0000000 0.0000009 30030 30030.000000 0.0000000 0.0000000 0.0000666 30031
Log OE ratio 0.0736678 0.0030289 0.1443067 0.0409762 0.0012279 0.0000501 0.0012947 30030 731.850759 0.0544075 0.0516001 0.0026514 30031
OE difference 0.0019387 0.0000178 0.0038597 0.0479236 0.0000009 0.0000000 0.0000010 30030 968.279532 0.0467635 0.0446744 0.0020131 30031
Calibration Intercept -0.4082709 -0.5791688 -0.2373730 0.0002737 0.0034439 0.0019309 0.0060185 30030 10.922498 0.7475660 0.4277755 0.1129269 30031
Calibration Slope 0.6270993 0.5449601 0.7092384 0.0000005 0.0005521 0.0004844 0.0011979 30029 6.876937 1.1699029 0.5391499 0.1479052 30031
Logit AUC 2.2297884 2.0794618 2.3801149 0.0000000 0.0035248 0.0012409 0.0051792 30030 19.580638 0.4693847 0.3194430 0.0744247 30031
Brier 0.0238787 0.0220470 0.0257104 0.0000000 0.0000006 0.0000002 0.0000008 30030 25.964741 0.3838384 0.2773723 0.0594733 30031
res_pool2 <- tidy_pool(results_stack3a4_5y) 

res_pool2 |> 
  kbl() |> 
  kable_classic(full_width = T, html_font = "Cambria")
term estimate ll ul pval ubar b t dfcom df riv lambda fmi n
Average predicted risk 0.0454225 NaN NaN NA NaN 1.00e-07 NaN 30030 NA NaN NaN NaN 30031
Overall observerd risk 0.0476187 0.0450853 0.0501521 0.0000000 0.0000017 0.00e+00 0.0000017 30030 30030.00000 0.0000000 0.0000000 0.0000666 30031
Log OE ratio 0.0472319 -0.0079678 0.1024316 0.0933453 0.0007368 3.91e-05 0.0007889 30030 449.99877 0.0708125 0.0661297 0.0042690 30031
OE difference 0.0021962 -0.0004230 0.0048155 0.1001135 0.0000017 1.00e-07 0.0000018 30030 539.07942 0.0641981 0.0603254 0.0035782 30031
Calibration Intercept -0.3416117 -0.4298735 -0.2533500 0.0000000 0.0019609 4.83e-05 0.0020253 30030 1855.22148 0.0328110 0.0317687 0.0010592 30031
Calibration Slope 0.6214536 0.5681692 0.6747380 0.0000000 0.0003198 1.91e-04 0.0005745 30029 10.16851 0.7964856 0.4433576 0.1182095 30031
Logit AUC 2.0378663 1.9444443 2.1312882 0.0000000 0.0022485 1.73e-05 0.0022715 30030 11756.41892 0.0102445 0.0101407 0.0001692 30031
Brier 0.0385925 0.0364634 0.0407215 0.0000000 0.0000010 1.00e-07 0.0000012 30030 106.62395 0.1583138 0.1366761 0.0169974 30031
tab_res_2y <- res_pool1 |> 
  select(term, estimate, ll, ul) |> 
  mutate(
    across(c(estimate, ll, ul), ~ if_else(term == "Log OE ratio", exp(.x), .x)), 
    across(c(estimate, ll, ul), ~ if_else(term == "Logit AUC", exp(.x) / (1  + exp(.x)), .x)), 
    term = if_else(term == "Log OE ratio", "OE ratio", term), 
    term = if_else(term == "Logit AUC", "AUC", term), 
    across(c(estimate, ll, ul), ~ if_else(term %in% 
                                            c("Average predicted risk", 
                                              "Overall observerd risk", 
                                              "OE difference"), 100 * .x, .x)), 
    across(c(estimate, ll, ul), ~ round(.x, 2)), 
    measures = case_when(term == "Average predicted risk" ~ str_glue("{estimate}%"), 
                       term %in% c("Overall observerd risk", "OE difference") ~ str_glue("{estimate}% ({ll}% to {ul}%)"),
                       TRUE ~ str_glue("{estimate} ({ll} to {ul})")
                       )
    ) |> 
  select(term, measures) |> 
  mutate(grupo = c(rep("Calibration", 6), "Discrimination", "Overall performance"), 
         year = "2 years")

tab_res_5y <- res_pool2 |> 
  select(term, estimate, ll, ul) |> 
  mutate(
    across(c(estimate, ll, ul), ~ if_else(term == "Log OE ratio", exp(.x), .x)), 
    across(c(estimate, ll, ul), ~ if_else(term == "Logit AUC", exp(.x) / (1  + exp(.x)), .x)), 
    term = if_else(term == "Log OE ratio", "OE ratio", term), 
    term = if_else(term == "Logit AUC", "AUC", term), 
    across(c(estimate, ll, ul), ~ if_else(term %in% 
                                            c("Average predicted risk", 
                                              "Overall observerd risk", 
                                              "OE difference"), 100 * .x, .x)), 
    across(c(estimate, ll, ul), ~ round(.x, 2)), 
    measures = case_when(term == "Average predicted risk" ~ str_glue("{estimate}%"), 
                       term %in% c("Overall observerd risk", "OE difference") ~ str_glue("{estimate}% ({ll}% to {ul}%)"),
                       TRUE ~ str_glue("{estimate} ({ll} to {ul})")
                       )
    ) |> 
  select(term, measures) |> 
  mutate(grupo = c(rep("Calibration", 6), "Discrimination", "Overall performance"), 
         year = "5 years")

tab_res0 <- tab_res_2y |>
  bind_rows(tab_res_5y)

tab_res0 |> 
  as_grouped_data(groups = "year") |> 
  as_grouped_data(groups = "grupo") |> 
  flextable::as_flextable(hide_grouplabel = TRUE) |> 
  autofit() |> 
  set_header_labels(
    year = "Time horizon", 
    term = "Performance measure", 
    measures = "Method A"
  ) |>  
  bold(j = 1) |> 
  set_caption("Table. Performance measures of KFRE in the external dataset of patients with CKD Stages 3a-4") |>  
  theme_booktabs() |>   
  bold(bold = TRUE, part = "header") -> table_perf_final

table_perf_final %>% 
  flextable::save_as_docx(path = here("Tables/Table_Imputed_Performance_metA.docx"))

table_perf_final

Time horizon

Performance measure

Method A

2 years

Calibration

Average predicted risk

2.54%

Overall observerd risk

2.73% (2.54% to 2.92%)

OE ratio

1.08 (1 to 1.16)

OE difference

0.19% (0% to 0.39%)

Calibration Intercept

-0.41 (-0.58 to -0.24)

Calibration Slope

0.63 (0.54 to 0.71)

Discrimination

AUC

0.9 (0.89 to 0.92)

Overall performance

Brier

0.02 (0.02 to 0.03)

5 years

Calibration

Average predicted risk

4.54%

Overall observerd risk

4.76% (4.51% to 5.02%)

OE ratio

1.05 (0.99 to 1.11)

OE difference

0.22% (-0.04% to 0.48%)

Calibration Intercept

-0.34 (-0.43 to -0.25)

Calibration Slope

0.62 (0.57 to 0.67)

Discrimination

AUC

0.88 (0.87 to 0.89)

Overall performance

Brier

0.04 (0.04 to 0.04)

rm(list=ls()[! ls() %in% c("imp.datosA", "imp.datosA2", "vdata", 
                           "primary_event", "horizon", 
                           "process_imp_cal_plot", "seed", "n_cores", "kfre_pi", 
                           "imputs")])
gc()
           used  (Mb) gc trigger  (Mb)  max used   (Mb)
Ncells  3930326 210.0    7207984 385.0   7207984  385.0
Vcells 10209937  77.9   76789932 585.9 187462211 1430.3

1.6.2 Moderate calibration: Calibration curves lowess based on subdistributional hazards

primary_event <- 1

n_internal_knots <- 5

# Seleccion del grupo: Stages 3-4----

# A 2 años----
horizon <- 2

vdata <- imp.datosA %>% 
  rename(pred = risk2y) |> 
  select(.imp, .id, time, eventd, pred) |> 
  mutate(cll_pred = log(-log(1 - pred)))

rcs_vdata <- ns(vdata$cll_pred, df = n_internal_knots + 1)
colnames(rcs_vdata) <- paste0("basisf_", colnames(rcs_vdata))
knots <- attr(rcs_vdata, "knots")
bound.knots <-  attr(rcs_vdata, "Boundary.knots")

pp <- seq(min(vdata$pred), max(vdata$pred), length.out = 1000)
cll_pp <- log(-log(1 - pp))
rcs_cll_pp <- ns(cll_pp, knots = knots, Boundary.knots = bound.knots)
colnames(rcs_cll_pp) <- paste0("basisf_", colnames(rcs_cll_pp))

vdata_bis_pp <- cbind(pp, as.data.frame(rcs_cll_pp))

future::plan("multisession", workers = n_cores)
subdist_df_imp <- future_map(1:max(vdata$.imp),
                         process_imp_cal_plot, 
                         vdata = vdata, 
                         primary_event = primary_event, 
                         horizon = horizon, 
                         type = "subdist_hazard", 
                         n_internal_knots = n_internal_knots, 
                         vdata_bis_pp, 
                         .options = furrr_options(seed = seed, 
                                                  packages = c("riskRegression", 
                                                               "survival", 
                                                               "splines", 
                                                               "cmprsk",
                                                               "tidyverse")), 
                         .progress = TRUE)
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
# 5 knots seems to give somewhat equivalent graph to pseudo method with bw = 0.05
subdist_df_imp_obs <- subdist_df_imp |> 
  bind_rows() |> 
  filter(type == "observed")

subdist_df_stack <- subdist_df_imp_obs |>
  group_by(.imp) |> 
  mutate(deciles_risk = as.integer(quantcut(risk, seq(0, 1, by = 0.1)))) |> 
  group_by(.imp, deciles_risk) |> 
  summarise(obs_mean_imp = mean(obs), 
            risk_mean_imp = mean(risk)) |> 
  group_by(deciles_risk) |> 
  summarise(obs_mean_pool = mean(obs_mean_imp), 
            risk_mean_pool = mean(risk_mean_imp))
  
subdist_df_imp_fix <- subdist_df_imp |> 
  bind_rows() |>  
  filter(type == "fixed") |> 
  arrange(risk) |> 
  summarise(obs_pool = mean(obs), 
            .by = risk) |> 
  mutate(deciles_risk = quantcut(risk, seq(0, 1, by = 0.1)))

rio::export(subdist_df_imp, here("Data", "Tidy", "subdist_df_imput_3a4_2y_metA.rds"))
rio::export(subdist_df_stack, here("Data", "Tidy", "subdist_df_deciles_3a4_2y_metA.rds"))

# Grafico de calibracion
ggplot() +
  geom_abline(intercept = 0, slope = 1, colour = "red", linetype = 2) + 
  geom_line(data = subdist_df_imp_fix, 
            aes(x = risk, y = obs_pool),
            alpha = 0.5, color = "black") +
  geom_point(data = subdist_df_stack,
             aes(x = risk_mean_pool, y = obs_mean_pool),
             shape = 23,
             stroke = 0.1,
             fill = "blue") + 
  scale_y_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
  scale_x_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
  theme_bw() + 
  labs(x = "Predicted risks", y = "Observed outcome proportions") + 
  # geom_rug(data = subdist_df_stack,
  #          aes(x = risk_mean_pool, y = obs_mean_pool),
  #          alpha = 0.1) +
  coord_fixed(ratio = 1, expand = TRUE)  -> plot_mod0_2y

# Grafico de calibracion con curvas imputadas
plot_mod0_2y + 
  geom_line(data = subdist_df_imp_obs, 
            aes(x = risk, y = obs, group = .imp),
            alpha = 0.1, color = "#38B8F7") -> plot_mod0_imp_2y

gc()
           used  (Mb) gc trigger  (Mb)  max used   (Mb)
Ncells  4007596 214.1    7207984 385.0   7207984  385.0
Vcells 11142514  85.1   61431946 468.7 187462211 1430.3
# A 5 años----
horizon <- 5

vdata <- imp.datosA %>% 
  rename(pred = risk5y) |> 
  select(.imp, .id, time, eventd, pred) |> 
  mutate(cll_pred = log(-log(1 - pred))) 

rcs_vdata <- ns(vdata$cll_pred, df = n_internal_knots + 1)
colnames(rcs_vdata) <- paste0("basisf_", colnames(rcs_vdata))
knots <- attr(rcs_vdata, "knots")
bound.knots <-  attr(rcs_vdata, "Boundary.knots")

pp <- seq(min(vdata$pred), max(vdata$pred), length.out = 1000)
cll_pp <- log(-log(1 - pp))
rcs_cll_pp <- ns(cll_pp, knots = knots, Boundary.knots = bound.knots)
colnames(rcs_cll_pp) <- paste0("basisf_", colnames(rcs_cll_pp))

vdata_bis_pp <- cbind(pp, as.data.frame(rcs_cll_pp))

future::plan("multisession", workers = n_cores)
subdist_df_imp <- future_map(1:max(vdata$.imp),
                         process_imp_cal_plot, 
                         vdata = vdata, 
                         primary_event = primary_event, 
                         horizon = horizon, 
                         type = "subdist_hazard", 
                         n_internal_knots = n_internal_knots, 
                         vdata_bis_pp, 
                         .options = furrr_options(seed = seed, 
                                                  packages = c("riskRegression", 
                                                               "survival", 
                                                               "splines", 
                                                               "cmprsk",
                                                               "tidyverse")), 
                         .progress = TRUE)
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
# 5 knots seems to give somewhat equivalent graph to pseudo method with bw = 0.05
subdist_df_imp_obs <- subdist_df_imp |> 
  bind_rows() |> 
  filter(type == "observed")

subdist_df_stack <- subdist_df_imp_obs |>
  group_by(.imp) |> 
  mutate(deciles_risk = as.integer(quantcut(risk, seq(0, 1, by = 0.1)))) |> 
  group_by(.imp, deciles_risk) |> 
  summarise(obs_mean_imp = mean(obs), 
            risk_mean_imp = mean(risk)) |> 
  group_by(deciles_risk) |> 
  summarise(obs_mean_pool = mean(obs_mean_imp), 
            risk_mean_pool = mean(risk_mean_imp))
  
subdist_df_imp_fix <- subdist_df_imp |> 
  bind_rows() |>  
  filter(type == "fixed") |> 
  arrange(risk) |> 
  summarise(obs_pool = mean(obs), 
            .by = risk) |> 
  mutate(deciles_risk = quantcut(risk, seq(0, 1, by = 0.1)))

rio::export(subdist_df_imp, here("Data", "Tidy", "subdist_df_imput_3a4_5y_metA.rds"))
rio::export(subdist_df_stack, here("Data", "Tidy", "subdist_df_deciles_3a4_5y_metA.rds"))

# Grafico de calibracion
ggplot() +
  geom_abline(intercept = 0, slope = 1, colour = "red", linetype = 2) + 
  geom_line(data = subdist_df_imp_fix, 
            aes(x = risk, y = obs_pool),
            alpha = 0.5, color = "black") +
  geom_point(data = subdist_df_stack,
             aes(x = risk_mean_pool, y = obs_mean_pool),
             shape = 23,
             stroke = 0.1,
             fill = "blue", 
             alpha = 0.5) + 
  scale_y_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
  scale_x_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
  theme_bw() + 
  labs(x = "Predicted risks", y = "Observed outcome proportions") + 
  # geom_rug(data = subdist_df_stack,
  #          aes(x = risk_mean_pool, y = obs_mean_pool),
  #          alpha = 0.1) +
  coord_fixed(ratio = 1, expand = TRUE)  -> plot_mod0_5y

# Grafico de calibracion con curvas imputadas
plot_mod0_5y + 
  geom_line(data = subdist_df_imp_obs, 
            aes(x = risk, y = obs, group = .imp),
            alpha = 0.3, color = "#38B8F7") -> plot_mod0_imp_5y

## Grafico final
plot_mod0_2y <- plot_mod0_2y + 
  labs(title = "Método A\n(2 year KFRE)") + 
  theme(plot.title = element_text(hjust = 0.5))

plot_mod0_5y <- plot_mod0_5y + 
  labs(title = "Método A\n(5 year KFRE)") + 
  theme(plot.title = element_text(hjust = 0.5))

plot_mod0_imp_2y <- plot_mod0_imp_2y +
  labs(title = "Método A\n(2 year KFRE)") +
  theme(plot.title = element_text(hjust = 0.5))

plot_mod0_imp_5y <- plot_mod0_imp_5y +
  labs(title = "Método A\n(5 year KFRE)") +
  theme(plot.title = element_text(hjust = 0.5))

plot_cal_mod0 <- plot_mod0_2y / plot_mod0_5y
plot_cal_imp_mod0 <- plot_mod0_imp_2y / plot_mod0_imp_5y


export(plot_mod0_2y, here("Data", "Tidy", "plot_metA_2y.rds"))
export(plot_mod0_5y, here("Data", "Tidy", "plot_metA_5y.rds"))
export(plot_cal_mod0, here("Data", "Tidy", "plot_cal_metA.rds"))

export(plot_mod0_imp_2y, here("Data", "Tidy", "plot_metA_imp_2y.rds"))
export(plot_mod0_imp_5y, here("Data", "Tidy", "plot_metA_imp_5y.rds"))
export(plot_cal_imp_mod0, here("Data", "Tidy", "plot_cal_imp_metA.rds"))


ggsave(filename = "Plot_Calibration_imputed_metA.png", 
       device = "png", 
       plot = plot_cal_mod0, 
       path = here("Figures"), 
       scale = 2, 
       width = 12, 
       height = 12,
       units = "cm", 
       dpi = 600)

ggsave(filename = "Plot_Calibration_imputed_estabilidad_metA.png", 
       device = "png", 
       plot = plot_cal_imp_mod0, 
       path = here("Figures"), 
       scale = 2, 
       width = 12, 
       height = 12,
       units = "cm", 
       dpi = 600)

gc()
           used  (Mb) gc trigger (Mb)  max used   (Mb)
Ncells  4082740 218.1    7207984  385   7207984  385.0
Vcells 11657615  89.0   49145557  375 187462211 1430.3
knitr::include_graphics(here("Figures", "Plot_Calibration_imputed_metA.png"))

Calibration curves for each group and prediction horizon
knitr::include_graphics(here("Figures", "Plot_Calibration_imputed_estabilidad_metA.png"))

Calibration curves for each group and prediction horizon

1.7 Metodo B: Reestimar coeficientes mediante Cox

source(here("Code", "source", "kfre_pi.R"))
source(here("Code", "source", "kfre_pr.R"))
source(here("Code", "source", "oe_ratio.R"))
source(here("Code", "source", "calibration_intercept.R"))
source(here("Code", "source", "calibration_slope.R"))
source(here("Code", "source", "auc.R"))
source(here("Code", "source", "auc_brier_boot.R"))
source(here("Code", "source", "validate.mids.R"))
source(here("Code", "source", "pool.validate.mids.R"))
source(here("Code", "source", "pool.auc.mids.R"))
source(here("Code", "source", "process_imp_cal_plot.R"))
source(here("Code", "source", "predict.mira.R"))
source(here("Code", "source", "performance_measures.R"))
source(here("Code", "source", "tidy_performance_stack.R"))    
source(here("Code", "source", "tidy_pool.R")) 
source(here("Code", "source", "process_imp_cal_plot2.R")) 
source(here("Code", "source", "print_equation.R")) 
df_recal_metB <- import(here("Data", "Tidy", "equations", "df_recal_modB.rds"))

df_recal_metB2y <- df_recal_metB |> 
  filter(year == 2) |> 
  select(-year) |> 
  rename(st0_imp2y = st0_imp, 
         fc_coef_imp2y = fc_coef_imp)

df_recal_metB5y <- df_recal_metB |> 
  filter(year == 5) |> 
  select(-year) |> 
  rename(st0_imp5y = st0_imp, 
         fc_coef_imp5y = fc_coef_imp)

rm(df_recal_metB)

gc()
           used  (Mb) gc trigger (Mb)  max used   (Mb)
Ncells  4086638 218.3    7207984  385   7207984  385.0
Vcells 11653610  89.0   39316446  300 187462211 1430.3
imp.datosA <- imp.datosA2 |> 
  left_join(df_recal_metB2y, by = ".imp") |> 
  left_join(df_recal_metB5y, by = ".imp") |> 
  mutate(eventdf = factor(eventd), 
        risk2y = 1 - st0_imp2y ^ exp(fc_coef_imp2y * kfre_pi(imp.datosA2)), 
        risk5y = 1 - st0_imp5y ^ exp(fc_coef_imp5y * kfre_pi(imp.datosA2))) |> 
  select(.imp, .id, time, eventd, eventdf, risk2y, risk5y)

rm(df_recal_metB2y, df_recal_metB5y)
head(imp.datosA)

1.7.1 Calibration and Discrimination Measures

future::plan("multisession", workers = n_cores)
results_stack3a4_2y <- tidy_performance_stack(imp.datosA, 
                                  horizon = 2, 
                                  primary_event = 1, 
                                  pred = "risk2y",
                                  seed = seed, 
                                  n_cores = n_cores)

gc()
           used  (Mb) gc trigger (Mb)  max used   (Mb)
Ncells  4087930 218.4    7207984  385   7207984  385.0
Vcells 12017275  91.7   39316446  300 187462211 1430.3
rio::export(results_stack3a4_2y , here("Data", "Tidy", 
                                       "res_valext_kfre_stack3a4_2y_metB.rds"))

future::plan("multisession", workers = n_cores)
results_stack3a4_5y <- tidy_performance_stack(imp.datosA, 
                                  horizon = 5, 
                                  primary_event = 1, 
                                  pred = "risk5y",
                                  seed = seed, 
                                  n_cores = n_cores)

rio::export(results_stack3a4_5y, here("Data", "Tidy", 
                                      "res_valext_kfre_stack3a4_5y_metB.rds"))

gc()
           used  (Mb) gc trigger (Mb)  max used   (Mb)
Ncells  4088044 218.4    7207984  385   7207984  385.0
Vcells 12017543  91.7   39316446  300 187462211 1430.3
res_pool1 <- tidy_pool(results_stack3a4_2y) 

res_pool1 |> 
  kbl() |> 
  kable_classic(full_width = T, html_font = "Cambria")
term estimate ll ul pval ubar b t dfcom df riv lambda fmi n
Average predicted risk 0.0281935 NaN NaN NA NaN 0.0000000 NaN 30030 NA NaN NaN NaN 30031
Overall observerd risk 0.0273046 0.0254293 0.0291799 0.0000000 0.0000009 0.0000000 0.0000009 30030 30030.00000 0.0000000 0.0000000 0.0000666 30031
Log OE ratio -0.0320351 -0.1007684 0.0366983 0.3609698 0.0012279 0.0000014 0.0012297 30030 28989.54440 0.0015139 0.0015116 0.0000689 30031
OE difference -0.0008889 -0.0027657 0.0009880 0.3532645 0.0000009 0.0000000 0.0000009 30030 28857.26109 0.0016134 0.0016108 0.0000692 30031
Calibration Intercept -0.0523812 -0.1581912 0.0534289 0.3260322 0.0022879 0.0003826 0.0027981 30030 60.01153 0.2229924 0.1823334 0.0288466 30031
Calibration Slope 0.8493046 0.7625945 0.9360148 0.0000000 0.0010126 0.0004644 0.0016318 30029 13.88045 0.6114667 0.3794473 0.0960018 30031
Logit AUC 2.2297884 2.0794618 2.3801149 0.0000000 0.0035248 0.0012409 0.0051792 30030 19.58064 0.4693847 0.3194430 0.0744247 30031
Brier 0.0229523 0.0213269 0.0245776 0.0000000 0.0000005 0.0000001 0.0000007 30030 55.60918 0.2336973 0.1894284 0.0308923 30031
res_pool2 <- tidy_pool(results_stack3a4_5y) 

res_pool2 |> 
  kbl() |> 
  kable_classic(full_width = T, html_font = "Cambria")
term estimate ll ul pval ubar b t dfcom df riv lambda fmi n
Average predicted risk 0.0523210 NaN NaN NA NaN 0.00e+00 NaN 30030 NA NaN NaN NaN 30031
Overall observerd risk 0.0476187 0.0450853 0.0501521 0.0000000 0.0000017 0.00e+00 0.0000017 30030 30030.00000 0.0000000 0.0000000 0.0000666 30031
Log OE ratio -0.0941725 -0.1473790 -0.0409660 0.0005228 0.0007368 1.00e-07 0.0007369 30030 30010.28479 0.0001678 0.0001678 0.0000666 30031
OE difference -0.0047023 -0.0072360 -0.0021686 0.0002755 0.0000017 0.00e+00 0.0000017 30030 30003.44017 0.0002026 0.0002026 0.0000666 30031
Calibration Intercept -0.1398527 -0.2120131 -0.0676924 0.0001460 0.0013436 8.80e-06 0.0013553 30030 14141.47011 0.0086914 0.0086165 0.0001408 30031
Calibration Slope 0.8417300 0.7871257 0.8963344 0.0000000 0.0005867 1.13e-04 0.0007374 30029 47.77946 0.2568972 0.2043900 0.0353610 30031
Logit AUC 2.0378663 1.9444443 2.1312882 0.0000000 0.0022485 1.73e-05 0.0022715 30030 11756.41892 0.0102445 0.0101407 0.0001692 30031
Brier 0.0371864 0.0352595 0.0391134 0.0000000 0.0000009 1.00e-07 0.0000010 30030 194.08812 0.1125276 0.1011459 0.0096345 30031
tab_res_2y <- res_pool1 |> 
  select(term, estimate, ll, ul) |> 
  mutate(
    across(c(estimate, ll, ul), ~ if_else(term == "Log OE ratio", exp(.x), .x)), 
    across(c(estimate, ll, ul), ~ if_else(term == "Logit AUC", exp(.x) / (1  + exp(.x)), .x)), 
    term = if_else(term == "Log OE ratio", "OE ratio", term), 
    term = if_else(term == "Logit AUC", "AUC", term), 
    across(c(estimate, ll, ul), ~ if_else(term %in% 
                                            c("Average predicted risk", 
                                              "Overall observerd risk", 
                                              "OE difference"), 100 * .x, .x)), 
    across(c(estimate, ll, ul), ~ round(.x, 2)), 
    measures = case_when(term == "Average predicted risk" ~ str_glue("{estimate}%"), 
                       term %in% c("Overall observerd risk", "OE difference") ~ str_glue("{estimate}% ({ll}% to {ul}%)"),
                       TRUE ~ str_glue("{estimate} ({ll} to {ul})")
                       )
    ) |> 
  select(term, measures) |> 
  mutate(grupo = c(rep("Calibration", 6), "Discrimination", "Overall performance"), 
         year = "2 years")

tab_res_5y <- res_pool2 |> 
  select(term, estimate, ll, ul) |> 
  mutate(
    across(c(estimate, ll, ul), ~ if_else(term == "Log OE ratio", exp(.x), .x)), 
    across(c(estimate, ll, ul), ~ if_else(term == "Logit AUC", exp(.x) / (1  + exp(.x)), .x)), 
    term = if_else(term == "Log OE ratio", "OE ratio", term), 
    term = if_else(term == "Logit AUC", "AUC", term), 
    across(c(estimate, ll, ul), ~ if_else(term %in% 
                                            c("Average predicted risk", 
                                              "Overall observerd risk", 
                                              "OE difference"), 100 * .x, .x)), 
    across(c(estimate, ll, ul), ~ round(.x, 2)), 
    measures = case_when(term == "Average predicted risk" ~ str_glue("{estimate}%"), 
                       term %in% c("Overall observerd risk", "OE difference") ~ str_glue("{estimate}% ({ll}% to {ul}%)"),
                       TRUE ~ str_glue("{estimate} ({ll} to {ul})")
                       )
    ) |> 
  select(term, measures) |> 
  mutate(grupo = c(rep("Calibration", 6), "Discrimination", "Overall performance"), 
         year = "5 years")

tab_res0 <- tab_res_2y |>
  bind_rows(tab_res_5y)

tab_res0 |> 
  as_grouped_data(groups = "year") |> 
  as_grouped_data(groups = "grupo") |> 
  flextable::as_flextable(hide_grouplabel = TRUE) |> 
  autofit() |> 
  set_header_labels(
    year = "Time horizon", 
    term = "Performance measure", 
    measures = "Method B"
  ) |>  
  bold(j = 1) |> 
  set_caption("Table. Performance measures of KFRE in the external dataset of patients with CKD Stages 3a-4") |>  
  theme_booktabs() |>   
  bold(bold = TRUE, part = "header") -> table_perf_final

table_perf_final %>% 
  flextable::save_as_docx(path = here("Tables/Table_Imputed_Performance_metB.docx"))

table_perf_final

Time horizon

Performance measure

Method B

2 years

Calibration

Average predicted risk

2.82%

Overall observerd risk

2.73% (2.54% to 2.92%)

OE ratio

0.97 (0.9 to 1.04)

OE difference

-0.09% (-0.28% to 0.1%)

Calibration Intercept

-0.05 (-0.16 to 0.05)

Calibration Slope

0.85 (0.76 to 0.94)

Discrimination

AUC

0.9 (0.89 to 0.92)

Overall performance

Brier

0.02 (0.02 to 0.02)

5 years

Calibration

Average predicted risk

5.23%

Overall observerd risk

4.76% (4.51% to 5.02%)

OE ratio

0.91 (0.86 to 0.96)

OE difference

-0.47% (-0.72% to -0.22%)

Calibration Intercept

-0.14 (-0.21 to -0.07)

Calibration Slope

0.84 (0.79 to 0.9)

Discrimination

AUC

0.88 (0.87 to 0.89)

Overall performance

Brier

0.04 (0.04 to 0.04)

rm(list=ls()[! ls() %in% c("imp.datosA","imp.datosA2", "vdata", 
                           "primary_event", "horizon", 
                           "process_imp_cal_plot", "seed", "n_cores", "kfre_pi", 
                           "imputs")])
gc()
           used  (Mb) gc trigger (Mb)  max used   (Mb)
Ncells  4082843 218.1    7207984  385   7207984  385.0
Vcells 11090053  84.7   39316446  300 187462211 1430.3

1.7.2 Moderate calibration: Calibration curves lowess based on subdistributional hazards

primary_event <- 1

n_internal_knots <- 5

# Seleccion del grupo: Stages 3-4----

# A 2 años----
horizon <- 2

vdata <- imp.datosA %>% 
  rename(pred = risk2y) |> 
  select(.imp, .id, time, eventd, pred) |> 
  mutate(cll_pred = log(-log(1 - pred)))

rcs_vdata <- ns(vdata$cll_pred, df = n_internal_knots + 1)
colnames(rcs_vdata) <- paste0("basisf_", colnames(rcs_vdata))
knots <- attr(rcs_vdata, "knots")
bound.knots <-  attr(rcs_vdata, "Boundary.knots")

pp <- seq(min(vdata$pred), max(vdata$pred), length.out = 1000)
cll_pp <- log(-log(1 - pp))
rcs_cll_pp <- ns(cll_pp, knots = knots, Boundary.knots = bound.knots)
colnames(rcs_cll_pp) <- paste0("basisf_", colnames(rcs_cll_pp))

vdata_bis_pp <- cbind(pp, as.data.frame(rcs_cll_pp))

future::plan("multisession", workers = n_cores)
subdist_df_imp <- future_map(1:max(vdata$.imp),
                         process_imp_cal_plot, 
                         vdata = vdata, 
                         primary_event = primary_event, 
                         horizon = horizon, 
                         type = "subdist_hazard", 
                         n_internal_knots = n_internal_knots, 
                         vdata_bis_pp, 
                         .options = furrr_options(seed = seed, 
                                                  packages = c("riskRegression", 
                                                               "survival", 
                                                               "splines", 
                                                               "cmprsk",
                                                               "tidyverse")), 
                         .progress = TRUE)
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
# 5 knots seems to give somewhat equivalent graph to pseudo method with bw = 0.05
subdist_df_imp_obs <- subdist_df_imp |> 
  bind_rows() |> 
  filter(type == "observed")

subdist_df_stack <- subdist_df_imp_obs |>
  group_by(.imp) |> 
  mutate(deciles_risk = as.integer(quantcut(risk, seq(0, 1, by = 0.1)))) |> 
  group_by(.imp, deciles_risk) |> 
  summarise(obs_mean_imp = mean(obs), 
            risk_mean_imp = mean(risk)) |> 
  group_by(deciles_risk) |> 
  summarise(obs_mean_pool = mean(obs_mean_imp), 
            risk_mean_pool = mean(risk_mean_imp))
  
subdist_df_imp_fix <- subdist_df_imp |> 
  bind_rows() |>  
  filter(type == "fixed") |> 
  arrange(risk) |> 
  summarise(obs_pool = mean(obs), 
            .by = risk) |> 
  mutate(deciles_risk = quantcut(risk, seq(0, 1, by = 0.1)))

rio::export(subdist_df_imp, here("Data", "Tidy", "subdist_df_imput_3a4_2y_metB.rds"))
rio::export(subdist_df_stack, here("Data", "Tidy", "subdist_df_deciles_3a4_2y_metB.rds"))

# Grafico de calibracion
ggplot() +
  geom_abline(intercept = 0, slope = 1, colour = "red", linetype = 2) + 
  geom_line(data = subdist_df_imp_fix, 
            aes(x = risk, y = obs_pool),
            alpha = 0.5, color = "black") +
  geom_point(data = subdist_df_stack,
             aes(x = risk_mean_pool, y = obs_mean_pool),
             shape = 23,
             stroke = 0.1,
             fill = "blue") + 
  scale_y_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
  scale_x_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
  theme_bw() + 
  labs(x = "Predicted risks", y = "Observed outcome proportions") + 
  # geom_rug(data = subdist_df_stack,
  #          aes(x = risk_mean_pool, y = obs_mean_pool),
  #          alpha = 0.1) +
  coord_fixed(ratio = 1, expand = TRUE)  -> plot_mod0_2y

# Grafico de calibracion con curvas imputadas
plot_mod0_2y + 
  geom_line(data = subdist_df_imp_obs, 
            aes(x = risk, y = obs, group = .imp),
            alpha = 0.1, color = "#38B8F7") -> plot_mod0_imp_2y

gc()
           used  (Mb) gc trigger (Mb)  max used   (Mb)
Ncells  4082116 218.1    7207984  385   7207984  385.0
Vcells 11306583  86.3   39316446  300 187462211 1430.3
# A 5 años----
horizon <- 5

vdata <- imp.datosA %>% 
  rename(pred = risk5y) |> 
  select(.imp, .id, time, eventd, pred) |> 
  mutate(cll_pred = log(-log(1 - pred))) 

rcs_vdata <- ns(vdata$cll_pred, df = n_internal_knots + 1)
colnames(rcs_vdata) <- paste0("basisf_", colnames(rcs_vdata))
knots <- attr(rcs_vdata, "knots")
bound.knots <-  attr(rcs_vdata, "Boundary.knots")

pp <- seq(min(vdata$pred), max(vdata$pred), length.out = 1000)
cll_pp <- log(-log(1 - pp))
rcs_cll_pp <- ns(cll_pp, knots = knots, Boundary.knots = bound.knots)
colnames(rcs_cll_pp) <- paste0("basisf_", colnames(rcs_cll_pp))

vdata_bis_pp <- cbind(pp, as.data.frame(rcs_cll_pp))

future::plan("multisession", workers = n_cores)
subdist_df_imp <- future_map(1:max(vdata$.imp),
                         process_imp_cal_plot, 
                         vdata = vdata, 
                         primary_event = primary_event, 
                         horizon = horizon, 
                         type = "subdist_hazard", 
                         n_internal_knots = n_internal_knots, 
                         vdata_bis_pp, 
                         .options = furrr_options(seed = seed, 
                                                  packages = c("riskRegression", 
                                                               "survival", 
                                                               "splines", 
                                                               "cmprsk",
                                                               "tidyverse")), 
                         .progress = TRUE)
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
# 5 knots seems to give somewhat equivalent graph to pseudo method with bw = 0.05
subdist_df_imp_obs <- subdist_df_imp |> 
  bind_rows() |> 
  filter(type == "observed")

subdist_df_stack <- subdist_df_imp_obs |>
  group_by(.imp) |> 
  mutate(deciles_risk = as.integer(quantcut(risk, seq(0, 1, by = 0.1)))) |> 
  group_by(.imp, deciles_risk) |> 
  summarise(obs_mean_imp = mean(obs), 
            risk_mean_imp = mean(risk)) |> 
  group_by(deciles_risk) |> 
  summarise(obs_mean_pool = mean(obs_mean_imp), 
            risk_mean_pool = mean(risk_mean_imp))
  
subdist_df_imp_fix <- subdist_df_imp |> 
  bind_rows() |>  
  filter(type == "fixed") |> 
  arrange(risk) |> 
  summarise(obs_pool = mean(obs), 
            .by = risk) |> 
  mutate(deciles_risk = quantcut(risk, seq(0, 1, by = 0.1)))

rio::export(subdist_df_imp, here("Data", "Tidy", "subdist_df_imput_3a4_5y_metB.rds"))
rio::export(subdist_df_stack, here("Data", "Tidy", "subdist_df_deciles_3a4_5y_metB.rds"))

# Grafico de calibracion
ggplot() +
  geom_abline(intercept = 0, slope = 1, colour = "red", linetype = 2) + 
  geom_line(data = subdist_df_imp_fix, 
            aes(x = risk, y = obs_pool),
            alpha = 0.5, color = "black") +
  geom_point(data = subdist_df_stack,
             aes(x = risk_mean_pool, y = obs_mean_pool),
             shape = 23,
             stroke = 0.1,
             fill = "blue", 
             alpha = 0.5) + 
  scale_y_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
  scale_x_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
  theme_bw() + 
  labs(x = "Predicted risks", y = "Observed outcome proportions") + 
  # geom_rug(data = subdist_df_stack,
  #          aes(x = risk_mean_pool, y = obs_mean_pool),
  #          alpha = 0.1) +
  coord_fixed(ratio = 1, expand = TRUE)  -> plot_mod0_5y

# Grafico de calibracion con curvas imputadas
plot_mod0_5y + 
  geom_line(data = subdist_df_imp_obs, 
            aes(x = risk, y = obs, group = .imp),
            alpha = 0.3, color = "#38B8F7") -> plot_mod0_imp_5y

## Grafico final
plot_mod0_2y <- plot_mod0_2y + 
  labs(title = "Método B\n(2 year KFRE)") + 
  theme(plot.title = element_text(hjust = 0.5))

plot_mod0_5y <- plot_mod0_5y + 
  labs(title = "Método B\n(5 year KFRE)") + 
  theme(plot.title = element_text(hjust = 0.5))

plot_mod0_imp_2y <- plot_mod0_imp_2y +
  labs(title = "Método B\n(2 year KFRE)") +
  theme(plot.title = element_text(hjust = 0.5))

plot_mod0_imp_5y <- plot_mod0_imp_5y +
  labs(title = "Método B\n(5 year KFRE)") +
  theme(plot.title = element_text(hjust = 0.5))

plot_cal_mod0 <- plot_mod0_2y / plot_mod0_5y
plot_cal_imp_mod0 <- plot_mod0_imp_2y / plot_mod0_imp_5y


export(plot_mod0_2y, here("Data", "Tidy", "plot_metB_2y.rds"))
export(plot_mod0_5y, here("Data", "Tidy", "plot_metB_5y.rds"))
export(plot_cal_mod0, here("Data", "Tidy", "plot_cal_metB.rds"))

export(plot_mod0_imp_2y, here("Data", "Tidy", "plot_metB_imp_2y.rds"))
export(plot_mod0_imp_5y, here("Data", "Tidy", "plot_metB_imp_5y.rds"))
export(plot_cal_imp_mod0, here("Data", "Tidy", "plot_cal_imp_metB.rds"))


ggsave(filename = "Plot_Calibration_imputed_metB.png", 
       device = "png", 
       plot = plot_cal_mod0, 
       path = here("Figures"), 
       scale = 2, 
       width = 12, 
       height = 12,
       units = "cm", 
       dpi = 600)

ggsave(filename = "Plot_Calibration_imputed_estabilidad_metB.png", 
       device = "png", 
       plot = plot_cal_imp_mod0, 
       path = here("Figures"), 
       scale = 2, 
       width = 12, 
       height = 12,
       units = "cm", 
       dpi = 600)

gc()
           used  (Mb) gc trigger  (Mb)  max used   (Mb)
Ncells  4086165 218.3    7207984 385.0   7207984  385.0
Vcells 11674768  89.1   39339461 300.2 187462211 1430.3
knitr::include_graphics(here("Figures", "Plot_Calibration_imputed_metB.png"))

Calibration curves for each group and prediction horizon
knitr::include_graphics(here("Figures", "Plot_Calibration_imputed_estabilidad_metB.png"))

Calibration curves for each group and prediction horizon

1.8 Metodo C: Reestimar riesgo basal usando Cause-specific Hazard Models

source(here("Code", "source", "kfre_pi.R"))
source(here("Code", "source", "kfre_pr.R"))
source(here("Code", "source", "oe_ratio.R"))
source(here("Code", "source", "calibration_intercept.R"))
source(here("Code", "source", "calibration_slope.R"))
source(here("Code", "source", "auc.R"))
source(here("Code", "source", "auc_brier_boot.R"))
source(here("Code", "source", "validate.mids.R"))
source(here("Code", "source", "pool.validate.mids.R"))
source(here("Code", "source", "pool.auc.mids.R"))
source(here("Code", "source", "process_imp_cal_plot.R"))
source(here("Code", "source", "predict.mira.R"))
source(here("Code", "source", "performance_measures.R"))
source(here("Code", "source", "tidy_performance_stack.R"))    
source(here("Code", "source", "tidy_pool.R")) 
source(here("Code", "source", "process_imp_cal_plot2.R")) 
source(here("Code", "source", "print_equation.R")) 
df_recal_metC <- import(here("Data", "Tidy", "equations", "df_recal_modC.rds"))

df_recal_metC2y <- df_recal_metC |> 
  filter(year == 2) |> 
  select(-year) |> 
  rename(st0_imp2y = st0_imp, 
         fc_coef_imp2y = fc_coef_imp)

df_recal_metC5y <- df_recal_metC |> 
  filter(year == 5) |> 
  select(-year) |> 
  rename(st0_imp5y = st0_imp, 
         fc_coef_imp5y = fc_coef_imp)

rm(df_recal_metC)

gc()
           used  (Mb) gc trigger  (Mb)  max used   (Mb)
Ncells  4087358 218.3    7207984 385.0   7207984  385.0
Vcells 11662608  89.0   52534569 400.9 187462211 1430.3
imp.datosA <- imp.datosA2 |> 
  left_join(df_recal_metC2y, by = ".imp") |> 
  left_join(df_recal_metC5y, by = ".imp") |> 
  mutate(eventdf = factor(eventd), 
        risk2y = 1 - st0_imp2y ^ exp(fc_coef_imp2y * kfre_pi(imp.datosA2)), 
        risk5y = 1 - st0_imp5y ^ exp(fc_coef_imp5y * kfre_pi(imp.datosA2))) |> 
  select(.imp, .id, time, eventd, eventdf, risk2y, risk5y)

rm(df_recal_metC2y, df_recal_metC5y)
head(imp.datosA)

1.8.1 Calibration and Discrimination Measures

future::plan("multisession", workers = n_cores)
results_stack3a4_2y <- tidy_performance_stack(imp.datosA, 
                                  horizon = 2, 
                                  primary_event = 1, 
                                  pred = "risk2y",
                                  seed = seed, 
                                  n_cores = n_cores)

gc()
           used  (Mb) gc trigger  (Mb)  max used   (Mb)
Ncells  4088314 218.4    7207984 385.0   7207984  385.0
Vcells 12024235  91.8   42027656 320.7 187462211 1430.3
rio::export(results_stack3a4_2y , here("Data", "Tidy", 
                                       "res_valext_kfre_stack3a4_2y_metC.rds"))

future::plan("multisession", workers = n_cores)
results_stack3a4_5y <- tidy_performance_stack(imp.datosA, 
                                  horizon = 5, 
                                  primary_event = 1, 
                                  pred = "risk5y",
                                  seed = seed, 
                                  n_cores = n_cores)

rio::export(results_stack3a4_5y, here("Data", "Tidy", 
                                      "res_valext_kfre_stack3a4_5y_metC.rds"))

gc()
           used  (Mb) gc trigger  (Mb)  max used   (Mb)
Ncells  4088309 218.4    7207984 385.0   7207984  385.0
Vcells 12024308  91.8   42027656 320.7 187462211 1430.3
res_pool1 <- tidy_pool(results_stack3a4_2y) 

res_pool1 |> 
  kbl() |> 
  kable_classic(full_width = T, html_font = "Cambria")
term estimate ll ul pval ubar b t dfcom df riv lambda fmi n
Average predicted risk 0.0248293 NaN NaN NA NaN 0.0000000 NaN 30030 NA NaN NaN NaN 30031
Overall observerd risk 0.0273046 0.0254293 0.0291799 0.0000000 0.0000009 0.0000000 0.0000009 30030 30030.000000 0.0000000 0.0000000 0.0000666 30031
Log OE ratio 0.0950470 0.0241751 0.1659188 0.0086603 0.0012279 0.0000558 0.0013023 30030 599.688156 0.0605974 0.0571352 0.0032237 30031
OE difference 0.0024752 0.0005511 0.0043994 0.0117536 0.0000009 0.0000000 0.0000010 30030 859.040853 0.0498920 0.0475211 0.0022649 30031
Calibration Intercept -0.3813514 -0.5512420 -0.2114609 0.0004315 0.0034439 0.0018988 0.0059757 30030 11.134817 0.7351346 0.4236758 0.1115207 30031
Calibration Slope 0.6270993 0.5449601 0.7092384 0.0000005 0.0005521 0.0004844 0.0011979 30029 6.876937 1.1699029 0.5391499 0.1479052 30031
Logit AUC 2.2297884 2.0794618 2.3801149 0.0000000 0.0035248 0.0012409 0.0051792 30030 19.580638 0.4693847 0.3194430 0.0744247 30031
Brier 0.0238312 0.0220036 0.0256588 0.0000000 0.0000006 0.0000002 0.0000008 30030 26.672392 0.3767740 0.2736644 0.0581799 30031
res_pool2 <- tidy_pool(results_stack3a4_5y) 

res_pool2 |> 
  kbl() |> 
  kable_classic(full_width = T, html_font = "Cambria")
term estimate ll ul pval ubar b t dfcom df riv lambda fmi n
Average predicted risk 0.0427432 NaN NaN NA NaN 1.00e-07 NaN 30030 NA NaN NaN NaN 30031
Overall observerd risk 0.0476187 0.0450853 0.0501521 0.0000000 0.0000017 0.00e+00 0.0000017 30030 30030.00000 0.0000000 0.0000000 0.0000666 30031
Log OE ratio 0.1080349 0.0519826 0.1640872 0.0001853 0.0007368 5.48e-05 0.0008098 30030 243.75412 0.0991142 0.0901764 0.0077398 30031
OE difference 0.0048756 0.0022347 0.0075164 0.0003234 0.0000017 1.00e-07 0.0000018 30030 363.80816 0.0795149 0.0736580 0.0052516 30031
Calibration Intercept -0.2568547 -0.3447924 -0.1689171 0.0000000 0.0019609 3.78e-05 0.0020113 30030 2870.42137 0.0257134 0.0250688 0.0006873 30031
Calibration Slope 0.6214536 0.5681692 0.6747380 0.0000000 0.0003198 1.91e-04 0.0005745 30029 10.16851 0.7964856 0.4433576 0.1182095 30031
Logit AUC 2.0378663 1.9444443 2.1312882 0.0000000 0.0022485 1.73e-05 0.0022715 30030 11756.41892 0.0102445 0.0101407 0.0001692 30031
Brier 0.0384086 0.0362677 0.0405495 0.0000000 0.0000010 1.00e-07 0.0000012 30030 100.52540 0.1638412 0.1407762 0.0179591 30031
tab_res_2y <- res_pool1 |> 
  select(term, estimate, ll, ul) |> 
  mutate(
    across(c(estimate, ll, ul), ~ if_else(term == "Log OE ratio", exp(.x), .x)), 
    across(c(estimate, ll, ul), ~ if_else(term == "Logit AUC", exp(.x) / (1  + exp(.x)), .x)), 
    term = if_else(term == "Log OE ratio", "OE ratio", term), 
    term = if_else(term == "Logit AUC", "AUC", term), 
    across(c(estimate, ll, ul), ~ if_else(term %in% 
                                            c("Average predicted risk", 
                                              "Overall observerd risk", 
                                              "OE difference"), 100 * .x, .x)), 
    across(c(estimate, ll, ul), ~ round(.x, 2)), 
    measures = case_when(term == "Average predicted risk" ~ str_glue("{estimate}%"), 
                       term %in% c("Overall observerd risk", "OE difference") ~ str_glue("{estimate}% ({ll}% to {ul}%)"),
                       TRUE ~ str_glue("{estimate} ({ll} to {ul})")
                       )
    ) |> 
  select(term, measures) |> 
  mutate(grupo = c(rep("Calibration", 6), "Discrimination", "Overall performance"), 
         year = "2 years")

tab_res_5y <- res_pool2 |> 
  select(term, estimate, ll, ul) |> 
  mutate(
    across(c(estimate, ll, ul), ~ if_else(term == "Log OE ratio", exp(.x), .x)), 
    across(c(estimate, ll, ul), ~ if_else(term == "Logit AUC", exp(.x) / (1  + exp(.x)), .x)), 
    term = if_else(term == "Log OE ratio", "OE ratio", term), 
    term = if_else(term == "Logit AUC", "AUC", term), 
    across(c(estimate, ll, ul), ~ if_else(term %in% 
                                            c("Average predicted risk", 
                                              "Overall observerd risk", 
                                              "OE difference"), 100 * .x, .x)), 
    across(c(estimate, ll, ul), ~ round(.x, 2)), 
    measures = case_when(term == "Average predicted risk" ~ str_glue("{estimate}%"), 
                       term %in% c("Overall observerd risk", "OE difference") ~ str_glue("{estimate}% ({ll}% to {ul}%)"),
                       TRUE ~ str_glue("{estimate} ({ll} to {ul})")
                       )
    ) |> 
  select(term, measures) |> 
  mutate(grupo = c(rep("Calibration", 6), "Discrimination", "Overall performance"), 
         year = "5 years")

tab_res0 <- tab_res_2y |>
  bind_rows(tab_res_5y)

tab_res0 |> 
  as_grouped_data(groups = "year") |> 
  as_grouped_data(groups = "grupo") |> 
  flextable::as_flextable(hide_grouplabel = TRUE) |> 
  autofit() |> 
  set_header_labels(
    year = "Time horizon", 
    term = "Performance measure", 
    measures = "Method C"
  ) |>  
  bold(j = 1) |> 
  set_caption("Table. Performance measures of KFRE in the external dataset of patients with CKD Stages 3a-4") |>  
  theme_booktabs() |>   
  bold(bold = TRUE, part = "header") -> table_perf_final

table_perf_final %>% 
  flextable::save_as_docx(path = here("Tables/Table_Imputed_Performance_metC.docx"))

table_perf_final

Time horizon

Performance measure

Method C

2 years

Calibration

Average predicted risk

2.48%

Overall observerd risk

2.73% (2.54% to 2.92%)

OE ratio

1.1 (1.02 to 1.18)

OE difference

0.25% (0.06% to 0.44%)

Calibration Intercept

-0.38 (-0.55 to -0.21)

Calibration Slope

0.63 (0.54 to 0.71)

Discrimination

AUC

0.9 (0.89 to 0.92)

Overall performance

Brier

0.02 (0.02 to 0.03)

5 years

Calibration

Average predicted risk

4.27%

Overall observerd risk

4.76% (4.51% to 5.02%)

OE ratio

1.11 (1.05 to 1.18)

OE difference

0.49% (0.22% to 0.75%)

Calibration Intercept

-0.26 (-0.34 to -0.17)

Calibration Slope

0.62 (0.57 to 0.67)

Discrimination

AUC

0.88 (0.87 to 0.89)

Overall performance

Brier

0.04 (0.04 to 0.04)

rm(list=ls()[! ls() %in% c("imp.datosA", "imp.datosA2", "vdata", 
                           "primary_event", "horizon", 
                           "process_imp_cal_plot", "seed", "n_cores", "kfre_pi", 
                           "imputs")])
gc()
           used  (Mb) gc trigger  (Mb)  max used   (Mb)
Ncells  4083042 218.1    7207984 385.0   7207984  385.0
Vcells 11096693  84.7   42027656 320.7 187462211 1430.3

1.8.2 Moderate calibration: Calibration curves lowess based on subdistributional hazards

primary_event <- 1

n_internal_knots <- 5

# Seleccion del grupo: Stages 3-4----

# A 2 años----
horizon <- 2

vdata <- imp.datosA %>% 
  rename(pred = risk2y) |> 
  select(.imp, .id, time, eventd, pred) |> 
  mutate(cll_pred = log(-log(1 - pred)))

rcs_vdata <- ns(vdata$cll_pred, df = n_internal_knots + 1)
colnames(rcs_vdata) <- paste0("basisf_", colnames(rcs_vdata))
knots <- attr(rcs_vdata, "knots")
bound.knots <-  attr(rcs_vdata, "Boundary.knots")

pp <- seq(min(vdata$pred), max(vdata$pred), length.out = 1000)
cll_pp <- log(-log(1 - pp))
rcs_cll_pp <- ns(cll_pp, knots = knots, Boundary.knots = bound.knots)
colnames(rcs_cll_pp) <- paste0("basisf_", colnames(rcs_cll_pp))

vdata_bis_pp <- cbind(pp, as.data.frame(rcs_cll_pp))

future::plan("multisession", workers = n_cores)
subdist_df_imp <- future_map(1:max(vdata$.imp),
                         process_imp_cal_plot, 
                         vdata = vdata, 
                         primary_event = primary_event, 
                         horizon = horizon, 
                         type = "subdist_hazard", 
                         n_internal_knots = n_internal_knots, 
                         vdata_bis_pp, 
                         .options = furrr_options(seed = seed, 
                                                  packages = c("riskRegression", 
                                                               "survival", 
                                                               "splines", 
                                                               "cmprsk",
                                                               "tidyverse")), 
                         .progress = TRUE)
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
# 5 knots seems to give somewhat equivalent graph to pseudo method with bw = 0.05
subdist_df_imp_obs <- subdist_df_imp |> 
  bind_rows() |> 
  filter(type == "observed")

subdist_df_stack <- subdist_df_imp_obs |>
  group_by(.imp) |> 
  mutate(deciles_risk = as.integer(quantcut(risk, seq(0, 1, by = 0.1)))) |> 
  group_by(.imp, deciles_risk) |> 
  summarise(obs_mean_imp = mean(obs), 
            risk_mean_imp = mean(risk)) |> 
  group_by(deciles_risk) |> 
  summarise(obs_mean_pool = mean(obs_mean_imp), 
            risk_mean_pool = mean(risk_mean_imp))
  
subdist_df_imp_fix <- subdist_df_imp |> 
  bind_rows() |>  
  filter(type == "fixed") |> 
  arrange(risk) |> 
  summarise(obs_pool = mean(obs), 
            .by = risk) |> 
  mutate(deciles_risk = quantcut(risk, seq(0, 1, by = 0.1)))

rio::export(subdist_df_imp, here("Data", "Tidy", "subdist_df_imput_3a4_2y_metC.rds"))
rio::export(subdist_df_stack, here("Data", "Tidy", "subdist_df_deciles_3a4_2y_metC.rds"))

# Grafico de calibracion
ggplot() +
  geom_abline(intercept = 0, slope = 1, colour = "red", linetype = 2) + 
  geom_line(data = subdist_df_imp_fix, 
            aes(x = risk, y = obs_pool),
            alpha = 0.5, color = "black") +
  geom_point(data = subdist_df_stack,
             aes(x = risk_mean_pool, y = obs_mean_pool),
             shape = 23,
             stroke = 0.1,
             fill = "blue") + 
  scale_y_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
  scale_x_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
  theme_bw() + 
  labs(x = "Predicted risks", y = "Observed outcome proportions") + 
  # geom_rug(data = subdist_df_stack,
  #          aes(x = risk_mean_pool, y = obs_mean_pool),
  #          alpha = 0.1) +
  coord_fixed(ratio = 1, expand = TRUE)  -> plot_mod0_2y

# Grafico de calibracion con curvas imputadas
plot_mod0_2y + 
  geom_line(data = subdist_df_imp_obs, 
            aes(x = risk, y = obs, group = .imp),
            alpha = 0.1, color = "#38B8F7") -> plot_mod0_imp_2y

gc()
           used  (Mb) gc trigger  (Mb)  max used   (Mb)
Ncells  4082330 218.1    7207984 385.0   7207984  385.0
Vcells 11313246  86.4   42027656 320.7 187462211 1430.3
# A 5 años----
horizon <- 5

vdata <- imp.datosA %>% 
  rename(pred = risk5y) |> 
  select(.imp, .id, time, eventd, pred) |> 
  mutate(cll_pred = log(-log(1 - pred))) 

rcs_vdata <- ns(vdata$cll_pred, df = n_internal_knots + 1)
colnames(rcs_vdata) <- paste0("basisf_", colnames(rcs_vdata))
knots <- attr(rcs_vdata, "knots")
bound.knots <-  attr(rcs_vdata, "Boundary.knots")

pp <- seq(min(vdata$pred), max(vdata$pred), length.out = 1000)
cll_pp <- log(-log(1 - pp))
rcs_cll_pp <- ns(cll_pp, knots = knots, Boundary.knots = bound.knots)
colnames(rcs_cll_pp) <- paste0("basisf_", colnames(rcs_cll_pp))

vdata_bis_pp <- cbind(pp, as.data.frame(rcs_cll_pp))

future::plan("multisession", workers = n_cores)
subdist_df_imp <- future_map(1:max(vdata$.imp),
                         process_imp_cal_plot, 
                         vdata = vdata, 
                         primary_event = primary_event, 
                         horizon = horizon, 
                         type = "subdist_hazard", 
                         n_internal_knots = n_internal_knots, 
                         vdata_bis_pp, 
                         .options = furrr_options(seed = seed, 
                                                  packages = c("riskRegression", 
                                                               "survival", 
                                                               "splines", 
                                                               "cmprsk",
                                                               "tidyverse")), 
                         .progress = TRUE)
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
# 5 knots seems to give somewhat equivalent graph to pseudo method with bw = 0.05
subdist_df_imp_obs <- subdist_df_imp |> 
  bind_rows() |> 
  filter(type == "observed")

subdist_df_stack <- subdist_df_imp_obs |>
  group_by(.imp) |> 
  mutate(deciles_risk = as.integer(quantcut(risk, seq(0, 1, by = 0.1)))) |> 
  group_by(.imp, deciles_risk) |> 
  summarise(obs_mean_imp = mean(obs), 
            risk_mean_imp = mean(risk)) |> 
  group_by(deciles_risk) |> 
  summarise(obs_mean_pool = mean(obs_mean_imp), 
            risk_mean_pool = mean(risk_mean_imp))
  
subdist_df_imp_fix <- subdist_df_imp |> 
  bind_rows() |>  
  filter(type == "fixed") |> 
  arrange(risk) |> 
  summarise(obs_pool = mean(obs), 
            .by = risk) |> 
  mutate(deciles_risk = quantcut(risk, seq(0, 1, by = 0.1)))

rio::export(subdist_df_imp, here("Data", "Tidy", "subdist_df_imput_3a4_5y_metC.rds"))
rio::export(subdist_df_stack, here("Data", "Tidy", "subdist_df_deciles_3a4_5y_metC.rds"))

# Grafico de calibracion
ggplot() +
  geom_abline(intercept = 0, slope = 1, colour = "red", linetype = 2) + 
  geom_line(data = subdist_df_imp_fix, 
            aes(x = risk, y = obs_pool),
            alpha = 0.5, color = "black") +
  geom_point(data = subdist_df_stack,
             aes(x = risk_mean_pool, y = obs_mean_pool),
             shape = 23,
             stroke = 0.1,
             fill = "blue", 
             alpha = 0.5) + 
  scale_y_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
  scale_x_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
  theme_bw() + 
  labs(x = "Predicted risks", y = "Observed outcome proportions") + 
  # geom_rug(data = subdist_df_stack,
  #          aes(x = risk_mean_pool, y = obs_mean_pool),
  #          alpha = 0.1) +
  coord_fixed(ratio = 1, expand = TRUE)  -> plot_mod0_5y

# Grafico de calibracion con curvas imputadas
plot_mod0_5y + 
  geom_line(data = subdist_df_imp_obs, 
            aes(x = risk, y = obs, group = .imp),
            alpha = 0.3, color = "#38B8F7") -> plot_mod0_imp_5y

## Grafico final
plot_mod0_2y <- plot_mod0_2y + 
  labs(title = "Método C\n(2 year KFRE)") + 
  theme(plot.title = element_text(hjust = 0.5))

plot_mod0_5y <- plot_mod0_5y + 
  labs(title = "Método C\n(5 year KFRE)") + 
  theme(plot.title = element_text(hjust = 0.5))

plot_mod0_imp_2y <- plot_mod0_imp_2y +
  labs(title = "Método C\n(2 year KFRE)") +
  theme(plot.title = element_text(hjust = 0.5))

plot_mod0_imp_5y <- plot_mod0_imp_5y +
  labs(title = "Método C\n(5 year KFRE)") +
  theme(plot.title = element_text(hjust = 0.5))

plot_cal_mod0 <- plot_mod0_2y / plot_mod0_5y
plot_cal_imp_mod0 <- plot_mod0_imp_2y / plot_mod0_imp_5y


export(plot_mod0_2y, here("Data", "Tidy", "plot_metC_2y.rds"))
export(plot_mod0_5y, here("Data", "Tidy", "plot_metC_5y.rds"))
export(plot_cal_mod0, here("Data", "Tidy", "plot_cal_metC.rds"))

export(plot_mod0_imp_2y, here("Data", "Tidy", "plot_metC_imp_2y.rds"))
export(plot_mod0_imp_5y, here("Data", "Tidy", "plot_metC_imp_5y.rds"))
export(plot_cal_imp_mod0, here("Data", "Tidy", "plot_cal_imp_metC.rds"))


ggsave(filename = "Plot_Calibration_imputed_metC.png", 
       device = "png", 
       plot = plot_cal_mod0, 
       path = here("Figures"), 
       scale = 2, 
       width = 12, 
       height = 12,
       units = "cm", 
       dpi = 600)

ggsave(filename = "Plot_Calibration_imputed_estabilidad_metC.png", 
       device = "png", 
       plot = plot_cal_imp_mod0, 
       path = here("Figures"), 
       scale = 2, 
       width = 12, 
       height = 12,
       units = "cm", 
       dpi = 600)

gc()
           used  (Mb) gc trigger  (Mb)  max used   (Mb)
Ncells  4086326 218.3    7207984 385.0   7207984  385.0
Vcells 11681344  89.2   42045987 320.8 187462211 1430.3
knitr::include_graphics(here("Figures", "Plot_Calibration_imputed_metC.png"))

Calibration curves for each group and prediction horizon
knitr::include_graphics(here("Figures", "Plot_Calibration_imputed_estabilidad_metC.png"))

Calibration curves for each group and prediction horizon

1.9 Metodo D: Reestimar coeficientes mediante Cause-specific Hazard Models

source(here("Code", "source", "kfre_pi.R"))
source(here("Code", "source", "kfre_pr.R"))
source(here("Code", "source", "oe_ratio.R"))
source(here("Code", "source", "calibration_intercept.R"))
source(here("Code", "source", "calibration_slope.R"))
source(here("Code", "source", "auc.R"))
source(here("Code", "source", "auc_brier_boot.R"))
source(here("Code", "source", "validate.mids.R"))
source(here("Code", "source", "pool.validate.mids.R"))
source(here("Code", "source", "pool.auc.mids.R"))
source(here("Code", "source", "process_imp_cal_plot.R"))
source(here("Code", "source", "predict.mira.R"))
source(here("Code", "source", "performance_measures.R"))
source(here("Code", "source", "tidy_performance_stack.R"))    
source(here("Code", "source", "tidy_pool.R")) 
source(here("Code", "source", "process_imp_cal_plot2.R")) 
source(here("Code", "source", "print_equation.R")) 
df_recal_metD <- import(here("Data", "Tidy", "equations", "df_recal_modD.rds"))

df_recal_metD2y <- df_recal_metD |> 
  filter(year == 2) |> 
  select(-year) |> 
  rename(st0_imp2y = st0_imp, 
         fc_coef_imp2y = fc_coef_imp)

df_recal_metD5y <- df_recal_metD |> 
  filter(year == 5) |> 
  select(-year) |> 
  rename(st0_imp5y = st0_imp, 
         fc_coef_imp5y = fc_coef_imp)

rm(df_recal_metD)

gc()
           used  (Mb) gc trigger  (Mb)  max used   (Mb)
Ncells  4087558 218.3    7207984 385.0   7207984  385.0
Vcells 11669250  89.1   52542219 400.9 187462211 1430.3
imp.datosA <- imp.datosA2 |> 
  left_join(df_recal_metD2y, by = ".imp") |> 
  left_join(df_recal_metD5y, by = ".imp") |> 
  mutate(eventdf = factor(eventd), 
        risk2y = 1 - st0_imp2y ^ exp(fc_coef_imp2y * kfre_pi(imp.datosA2)), 
        risk5y = 1 - st0_imp5y ^ exp(fc_coef_imp5y * kfre_pi(imp.datosA2))) |> 
  select(.imp, .id, time, eventd, eventdf, risk2y, risk5y)

rm(df_recal_metD2y, df_recal_metD5y)
head(imp.datosA)

1.9.1 Calibration and Discrimination Measures

future::plan("multisession", workers = n_cores)
results_stack3a4_2y <- tidy_performance_stack(imp.datosA, 
                                  horizon = 2, 
                                  primary_event = 1, 
                                  pred = "risk2y",
                                  seed = seed, 
                                  n_cores = n_cores)

gc()
           used  (Mb) gc trigger  (Mb)  max used   (Mb)
Ncells  4088458 218.4    7207984 385.0   7207984  385.0
Vcells 12030787  91.8   42033776 320.7 187462211 1430.3
rio::export(results_stack3a4_2y , here("Data", "Tidy", 
                                       "res_valext_kfre_stack3a4_2y_metD.rds"))

future::plan("multisession", workers = n_cores)
results_stack3a4_5y <- tidy_performance_stack(imp.datosA, 
                                  horizon = 5, 
                                  primary_event = 1, 
                                  pred = "risk5y",
                                  seed = seed, 
                                  n_cores = n_cores)

rio::export(results_stack3a4_5y, here("Data", "Tidy", 
                                      "res_valext_kfre_stack3a4_5y_metD.rds"))

gc()
           used  (Mb) gc trigger  (Mb)  max used   (Mb)
Ncells  4088458 218.4    7207984 385.0   7207984  385.0
Vcells 12030870  91.8   42033776 320.7 187462211 1430.3
res_pool1 <- tidy_pool(results_stack3a4_2y) 

res_pool1 |> 
  kbl() |> 
  kable_classic(full_width = T, html_font = "Cambria")
term estimate ll ul pval ubar b t dfcom df riv lambda fmi n
Average predicted risk 0.0271243 NaN NaN NA NaN 0.0000000 NaN 30030 NA NaN NaN NaN 30031
Overall observerd risk 0.0273046 0.0254293 0.0291799 0.0000000 0.0000009 0.0000000 0.0000009 30030 30030.00000 0.0000000 0.0000000 0.0000666 30031
Log OE ratio 0.0066260 -0.0622337 0.0754858 0.8504016 0.0012279 0.0000048 0.0012342 30030 21410.19652 0.0051710 0.0051444 0.0000932 30031
OE difference 0.0001803 -0.0016998 0.0020604 0.8509201 0.0000009 0.0000000 0.0000009 30030 21582.14312 0.0050979 0.0050720 0.0000924 30031
Calibration Intercept -0.0091549 -0.1138241 0.0955143 0.8620119 0.0022879 0.0003494 0.0027538 30030 69.69343 0.2036091 0.1691654 0.0251857 30031
Calibration Slope 0.8493046 0.7625945 0.9360148 0.0000000 0.0010126 0.0004644 0.0016318 30029 13.88045 0.6114667 0.3794473 0.0960018 30031
Logit AUC 2.2297884 2.0794618 2.3801149 0.0000000 0.0035248 0.0012409 0.0051792 30030 19.58064 0.4693847 0.3194430 0.0744247 30031
Brier 0.0229436 0.0213151 0.0245721 0.0000000 0.0000005 0.0000001 0.0000007 30030 58.94212 0.2254656 0.1839836 0.0293180 30031
res_pool2 <- tidy_pool(results_stack3a4_5y) 

res_pool2 |> 
  kbl() |> 
  kable_classic(full_width = T, html_font = "Cambria")
term estimate ll ul pval ubar b t dfcom df riv lambda fmi n
Average predicted risk 0.0471597 NaN NaN NA NaN 0.00e+00 NaN 30030 NA NaN NaN NaN 30031
Overall observerd risk 0.0476187 0.0450853 0.0501521 0.0000000 0.0000017 0.00e+00 0.0000017 30030 30030.00000 0.0000000 0.0000000 0.0000666 30031
Log OE ratio 0.0096887 -0.0437872 0.0631645 0.7224911 0.0007368 5.60e-06 0.0007443 30030 11824.80583 0.0101951 0.0100922 0.0001682 30031
OE difference 0.0004590 -0.0020871 0.0030052 0.7237997 0.0000017 0.00e+00 0.0000017 30030 12117.43427 0.0099872 0.0098885 0.0001642 30031
Calibration Intercept -0.0152086 -0.0877588 0.0573416 0.6811130 0.0013436 1.94e-05 0.0013695 30030 4702.70409 0.0192693 0.0189050 0.0004210 30031
Calibration Slope 0.8417300 0.7871257 0.8963344 0.0000000 0.0005867 1.13e-04 0.0007374 30029 47.77946 0.2568972 0.2043900 0.0353610 30031
Logit AUC 2.0378663 1.9444443 2.1312882 0.0000000 0.0022485 1.73e-05 0.0022715 30030 11756.41892 0.0102445 0.0101407 0.0001692 30031
Brier 0.0370930 0.0351267 0.0390593 0.0000000 0.0000009 1.00e-07 0.0000010 30030 168.26334 0.1219319 0.1086803 0.0110433 30031
tab_res_2y <- res_pool1 |> 
  select(term, estimate, ll, ul) |> 
  mutate(
    across(c(estimate, ll, ul), ~ if_else(term == "Log OE ratio", exp(.x), .x)), 
    across(c(estimate, ll, ul), ~ if_else(term == "Logit AUC", exp(.x) / (1  + exp(.x)), .x)), 
    term = if_else(term == "Log OE ratio", "OE ratio", term), 
    term = if_else(term == "Logit AUC", "AUC", term), 
    across(c(estimate, ll, ul), ~ if_else(term %in% 
                                            c("Average predicted risk", 
                                              "Overall observerd risk", 
                                              "OE difference"), 100 * .x, .x)), 
    across(c(estimate, ll, ul), ~ round(.x, 2)), 
    measures = case_when(term == "Average predicted risk" ~ str_glue("{estimate}%"), 
                       term %in% c("Overall observerd risk", "OE difference") ~ str_glue("{estimate}% ({ll}% to {ul}%)"),
                       TRUE ~ str_glue("{estimate} ({ll} to {ul})")
                       )
    ) |> 
  select(term, measures) |> 
  mutate(grupo = c(rep("Calibration", 6), "Discrimination", "Overall performance"), 
         year = "2 years")

tab_res_5y <- res_pool2 |> 
  select(term, estimate, ll, ul) |> 
  mutate(
    across(c(estimate, ll, ul), ~ if_else(term == "Log OE ratio", exp(.x), .x)), 
    across(c(estimate, ll, ul), ~ if_else(term == "Logit AUC", exp(.x) / (1  + exp(.x)), .x)), 
    term = if_else(term == "Log OE ratio", "OE ratio", term), 
    term = if_else(term == "Logit AUC", "AUC", term), 
    across(c(estimate, ll, ul), ~ if_else(term %in% 
                                            c("Average predicted risk", 
                                              "Overall observerd risk", 
                                              "OE difference"), 100 * .x, .x)), 
    across(c(estimate, ll, ul), ~ round(.x, 2)), 
    measures = case_when(term == "Average predicted risk" ~ str_glue("{estimate}%"), 
                       term %in% c("Overall observerd risk", "OE difference") ~ str_glue("{estimate}% ({ll}% to {ul}%)"),
                       TRUE ~ str_glue("{estimate} ({ll} to {ul})")
                       )
    ) |> 
  select(term, measures) |> 
  mutate(grupo = c(rep("Calibration", 6), "Discrimination", "Overall performance"), 
         year = "5 years")

tab_res0 <- tab_res_2y |>
  bind_rows(tab_res_5y)

tab_res0 |> 
  as_grouped_data(groups = "year") |> 
  as_grouped_data(groups = "grupo") |> 
  flextable::as_flextable(hide_grouplabel = TRUE) |> 
  autofit() |> 
  set_header_labels(
    year = "Time horizon", 
    term = "Performance measure", 
    measures = "Method D"
  ) |>  
  bold(j = 1) |> 
  set_caption("Table. Performance measures of KFRE in the external dataset of patients with CKD Stages 3a-4") |>  
  theme_booktabs() |>   
  bold(bold = TRUE, part = "header") -> table_perf_final

table_perf_final %>% 
  flextable::save_as_docx(path = here("Tables/Table_Imputed_Performance_metD.docx"))

table_perf_final

Time horizon

Performance measure

Method D

2 years

Calibration

Average predicted risk

2.71%

Overall observerd risk

2.73% (2.54% to 2.92%)

OE ratio

1.01 (0.94 to 1.08)

OE difference

0.02% (-0.17% to 0.21%)

Calibration Intercept

-0.01 (-0.11 to 0.1)

Calibration Slope

0.85 (0.76 to 0.94)

Discrimination

AUC

0.9 (0.89 to 0.92)

Overall performance

Brier

0.02 (0.02 to 0.02)

5 years

Calibration

Average predicted risk

4.72%

Overall observerd risk

4.76% (4.51% to 5.02%)

OE ratio

1.01 (0.96 to 1.07)

OE difference

0.05% (-0.21% to 0.3%)

Calibration Intercept

-0.02 (-0.09 to 0.06)

Calibration Slope

0.84 (0.79 to 0.9)

Discrimination

AUC

0.88 (0.87 to 0.89)

Overall performance

Brier

0.04 (0.04 to 0.04)

rm(list=ls()[! ls() %in% c("imp.datosA", "imp.datosA2", "vdata", 
                           "primary_event", "horizon", 
                           "process_imp_cal_plot", "seed", "n_cores", "kfre_pi", 
                           "imputs")])
gc()
           used  (Mb) gc trigger  (Mb)  max used   (Mb)
Ncells  4083245 218.1    7207984 385.0   7207984  385.0
Vcells 11103353  84.8   42033776 320.7 187462211 1430.3

1.9.2 Moderate calibration: Calibration curves lowess based on subdistributional hazards

primary_event <- 1

n_internal_knots <- 5

# Seleccion del grupo: Stages 3-4----

# A 2 años----
horizon <- 2

vdata <- imp.datosA %>% 
  rename(pred = risk2y) |> 
  select(.imp, .id, time, eventd, pred) |> 
  mutate(cll_pred = log(-log(1 - pred)))

rcs_vdata <- ns(vdata$cll_pred, df = n_internal_knots + 1)
colnames(rcs_vdata) <- paste0("basisf_", colnames(rcs_vdata))
knots <- attr(rcs_vdata, "knots")
bound.knots <-  attr(rcs_vdata, "Boundary.knots")

pp <- seq(min(vdata$pred), max(vdata$pred), length.out = 1000)
cll_pp <- log(-log(1 - pp))
rcs_cll_pp <- ns(cll_pp, knots = knots, Boundary.knots = bound.knots)
colnames(rcs_cll_pp) <- paste0("basisf_", colnames(rcs_cll_pp))

vdata_bis_pp <- cbind(pp, as.data.frame(rcs_cll_pp))

future::plan("multisession", workers = n_cores)
subdist_df_imp <- future_map(1:max(vdata$.imp),
                         process_imp_cal_plot, 
                         vdata = vdata, 
                         primary_event = primary_event, 
                         horizon = horizon, 
                         type = "subdist_hazard", 
                         n_internal_knots = n_internal_knots, 
                         vdata_bis_pp, 
                         .options = furrr_options(seed = seed, 
                                                  packages = c("riskRegression", 
                                                               "survival", 
                                                               "splines", 
                                                               "cmprsk",
                                                               "tidyverse")), 
                         .progress = TRUE)
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
# 5 knots seems to give somewhat equivalent graph to pseudo method with bw = 0.05
subdist_df_imp_obs <- subdist_df_imp |> 
  bind_rows() |> 
  filter(type == "observed")

subdist_df_stack <- subdist_df_imp_obs |>
  group_by(.imp) |> 
  mutate(deciles_risk = as.integer(quantcut(risk, seq(0, 1, by = 0.1)))) |> 
  group_by(.imp, deciles_risk) |> 
  summarise(obs_mean_imp = mean(obs), 
            risk_mean_imp = mean(risk)) |> 
  group_by(deciles_risk) |> 
  summarise(obs_mean_pool = mean(obs_mean_imp), 
            risk_mean_pool = mean(risk_mean_imp))
  
subdist_df_imp_fix <- subdist_df_imp |> 
  bind_rows() |>  
  filter(type == "fixed") |> 
  arrange(risk) |> 
  summarise(obs_pool = mean(obs), 
            .by = risk) |> 
  mutate(deciles_risk = quantcut(risk, seq(0, 1, by = 0.1)))

rio::export(subdist_df_imp, here("Data", "Tidy", "subdist_df_imput_3a4_2y_metD.rds"))
rio::export(subdist_df_stack, here("Data", "Tidy", "subdist_df_deciles_3a4_2y_metD.rds"))

# Grafico de calibracion
ggplot() +
  geom_abline(intercept = 0, slope = 1, colour = "red", linetype = 2) + 
  geom_line(data = subdist_df_imp_fix, 
            aes(x = risk, y = obs_pool),
            alpha = 0.5, color = "black") +
  geom_point(data = subdist_df_stack,
             aes(x = risk_mean_pool, y = obs_mean_pool),
             shape = 23,
             stroke = 0.1,
             fill = "blue") + 
  scale_y_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
  scale_x_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
  theme_bw() + 
  labs(x = "Predicted risks", y = "Observed outcome proportions") + 
  # geom_rug(data = subdist_df_stack,
  #          aes(x = risk_mean_pool, y = obs_mean_pool),
  #          alpha = 0.1) +
  coord_fixed(ratio = 1, expand = TRUE)  -> plot_mod0_2y

# Grafico de calibracion con curvas imputadas
plot_mod0_2y + 
  geom_line(data = subdist_df_imp_obs, 
            aes(x = risk, y = obs, group = .imp),
            alpha = 0.1, color = "#38B8F7") -> plot_mod0_imp_2y

gc()
           used  (Mb) gc trigger  (Mb)  max used   (Mb)
Ncells  4082519 218.1    7207984 385.0   7207984  385.0
Vcells 11319890  86.4   42033776 320.7 187462211 1430.3
# A 5 años----
horizon <- 5

vdata <- imp.datosA %>% 
  rename(pred = risk5y) |> 
  select(.imp, .id, time, eventd, pred) |> 
  mutate(cll_pred = log(-log(1 - pred))) 

rcs_vdata <- ns(vdata$cll_pred, df = n_internal_knots + 1)
colnames(rcs_vdata) <- paste0("basisf_", colnames(rcs_vdata))
knots <- attr(rcs_vdata, "knots")
bound.knots <-  attr(rcs_vdata, "Boundary.knots")

pp <- seq(min(vdata$pred), max(vdata$pred), length.out = 1000)
cll_pp <- log(-log(1 - pp))
rcs_cll_pp <- ns(cll_pp, knots = knots, Boundary.knots = bound.knots)
colnames(rcs_cll_pp) <- paste0("basisf_", colnames(rcs_cll_pp))

vdata_bis_pp <- cbind(pp, as.data.frame(rcs_cll_pp))

future::plan("multisession", workers = n_cores)
subdist_df_imp <- future_map(1:max(vdata$.imp),
                         process_imp_cal_plot, 
                         vdata = vdata, 
                         primary_event = primary_event, 
                         horizon = horizon, 
                         type = "subdist_hazard", 
                         n_internal_knots = n_internal_knots, 
                         vdata_bis_pp, 
                         .options = furrr_options(seed = seed, 
                                                  packages = c("riskRegression", 
                                                               "survival", 
                                                               "splines", 
                                                               "cmprsk",
                                                               "tidyverse")), 
                         .progress = TRUE)
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
# 5 knots seems to give somewhat equivalent graph to pseudo method with bw = 0.05
subdist_df_imp_obs <- subdist_df_imp |> 
  bind_rows() |> 
  filter(type == "observed")

subdist_df_stack <- subdist_df_imp_obs |>
  group_by(.imp) |> 
  mutate(deciles_risk = as.integer(quantcut(risk, seq(0, 1, by = 0.1)))) |> 
  group_by(.imp, deciles_risk) |> 
  summarise(obs_mean_imp = mean(obs), 
            risk_mean_imp = mean(risk)) |> 
  group_by(deciles_risk) |> 
  summarise(obs_mean_pool = mean(obs_mean_imp), 
            risk_mean_pool = mean(risk_mean_imp))
  
subdist_df_imp_fix <- subdist_df_imp |> 
  bind_rows() |>  
  filter(type == "fixed") |> 
  arrange(risk) |> 
  summarise(obs_pool = mean(obs), 
            .by = risk) |> 
  mutate(deciles_risk = quantcut(risk, seq(0, 1, by = 0.1)))

rio::export(subdist_df_imp, here("Data", "Tidy", "subdist_df_imput_3a4_5y_metD.rds"))
rio::export(subdist_df_stack, here("Data", "Tidy", "subdist_df_deciles_3a4_5y_metD.rds"))

# Grafico de calibracion
ggplot() +
  geom_abline(intercept = 0, slope = 1, colour = "red", linetype = 2) + 
  geom_line(data = subdist_df_imp_fix, 
            aes(x = risk, y = obs_pool),
            alpha = 0.5, color = "black") +
  geom_point(data = subdist_df_stack,
             aes(x = risk_mean_pool, y = obs_mean_pool),
             shape = 23,
             stroke = 0.1,
             fill = "blue", 
             alpha = 0.5) + 
  scale_y_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
  scale_x_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
  theme_bw() + 
  labs(x = "Predicted risks", y = "Observed outcome proportions") + 
  # geom_rug(data = subdist_df_stack,
  #          aes(x = risk_mean_pool, y = obs_mean_pool),
  #          alpha = 0.1) +
  coord_fixed(ratio = 1, expand = TRUE)  -> plot_mod0_5y

# Grafico de calibracion con curvas imputadas
plot_mod0_5y + 
  geom_line(data = subdist_df_imp_obs, 
            aes(x = risk, y = obs, group = .imp),
            alpha = 0.3, color = "#38B8F7") -> plot_mod0_imp_5y

## Grafico final
plot_mod0_2y <- plot_mod0_2y + 
  labs(title = "Método D\n(2 year KFRE)") + 
  theme(plot.title = element_text(hjust = 0.5))

plot_mod0_5y <- plot_mod0_5y + 
  labs(title = "Método D\n(5 year KFRE)") + 
  theme(plot.title = element_text(hjust = 0.5))

plot_mod0_imp_2y <- plot_mod0_imp_2y +
  labs(title = "Método D\n(2 year KFRE)") +
  theme(plot.title = element_text(hjust = 0.5))

plot_mod0_imp_5y <- plot_mod0_imp_5y +
  labs(title = "Método D\n(5 year KFRE)") +
  theme(plot.title = element_text(hjust = 0.5))

plot_cal_mod0 <- plot_mod0_2y / plot_mod0_5y
plot_cal_imp_mod0 <- plot_mod0_imp_2y / plot_mod0_imp_5y


export(plot_mod0_2y, here("Data", "Tidy", "plot_metD_2y.rds"))
export(plot_mod0_5y, here("Data", "Tidy", "plot_metD_5y.rds"))
export(plot_cal_mod0, here("Data", "Tidy", "plot_cal_metD.rds"))

export(plot_mod0_imp_2y, here("Data", "Tidy", "plot_metD_imp_2y.rds"))
export(plot_mod0_imp_5y, here("Data", "Tidy", "plot_metD_imp_5y.rds"))
export(plot_cal_imp_mod0, here("Data", "Tidy", "plot_cal_imp_metD.rds"))


ggsave(filename = "Plot_Calibration_imputed_metD.png", 
       device = "png", 
       plot = plot_cal_mod0, 
       path = here("Figures"), 
       scale = 2, 
       width = 12, 
       height = 12,
       units = "cm", 
       dpi = 600)

ggsave(filename = "Plot_Calibration_imputed_estabilidad_metD.png", 
       device = "png", 
       plot = plot_cal_imp_mod0, 
       path = here("Figures"), 
       scale = 2, 
       width = 12, 
       height = 12,
       units = "cm", 
       dpi = 600)

gc()
           used  (Mb) gc trigger  (Mb)  max used   (Mb)
Ncells  4086564 218.3    7207984 385.0   7207984  385.0
Vcells 11688070  89.2   42052652 320.9 187462211 1430.3
knitr::include_graphics(here("Figures", "Plot_Calibration_imputed_metD.png"))

Calibration curves for each group and prediction horizon
knitr::include_graphics(here("Figures", "Plot_Calibration_imputed_estabilidad_metD.png"))

Calibration curves for each group and prediction horizon

1.10 Comparacion de modelos

plot_cal_metA <- import(here("Data", "Tidy", "plot_cal_metA.rds"))
plot_cal_metB <- import(here("Data", "Tidy", "plot_cal_metB.rds"))
plot_cal_metC <- import(here("Data", "Tidy", "plot_cal_metC.rds"))
plot_cal_metD <- import(here("Data", "Tidy", "plot_cal_metD.rds"))
(plot_cal_metA | plot_cal_metB | plot_cal_metC | plot_cal_metD) + plot_annotation(tag_levels = "A") -> plot_comp_calib

ggsave(filename = "Plot_Compare_Methods.png", 
       plot = plot_comp_calib, 
       device = "png", 
       path = here("Figures"), 
       scale = 2.5, 
       width = 12, 
       height = 6, 
       units = "cm")

1.11 Ticket de Reprocubilidad

sessionInfo()
R version 4.3.3 (2024-02-29 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 11 x64 (build 22631)

Matrix products: default


locale:
[1] LC_COLLATE=English_United States.utf8 
[2] LC_CTYPE=English_United States.utf8   
[3] LC_MONETARY=English_United States.utf8
[4] LC_NUMERIC=C                          
[5] LC_TIME=English_United States.utf8    

time zone: America/Lima
tzcode source: internal

attached base packages:
 [1] parallel  grid      splines   stats     graphics  grDevices utils    
 [8] datasets  methods   base     

other attached packages:
 [1] smplot2_0.1.0              impstep_0.1.0             
 [3] yardstick_1.3.1            workflowsets_1.1.0        
 [5] workflows_1.1.4            tune_1.2.0                
 [7] recipes_1.0.10             parsnip_1.2.1             
 [9] modeldata_1.3.0            infer_1.0.7               
[11] dials_1.2.1                tidymodels_1.2.0          
[13] rio_1.0.1                  tictoc_1.2.1              
[15] ggmice_0.1.0               furrr_0.3.1               
[17] future_1.33.2              ggExtra_0.10.1            
[19] gtools_3.9.5               DescTools_0.99.54         
[21] naniar_1.1.0               mice_3.16.0               
[23] PerformanceAnalytics_2.0.4 xts_0.13.2                
[25] zoo_1.8-12                 VIM_6.2.2                 
[27] colorspace_2.1-0           janitor_2.2.0             
[29] gt_0.10.1                  DiagrammeR_1.0.11         
[31] ggtext_0.1.2               htmltools_0.5.8           
[33] devtools_2.4.5             usethis_2.2.3             
[35] gghalves_0.1.4             xml2_1.3.6                
[37] downlit_0.4.3              broom_1.0.5               
[39] dcurves_0.4.0              glue_1.7.0                
[41] labelled_2.12.0            scales_1.3.0              
[43] cowplot_1.1.3              ggsci_3.0.3               
[45] patchwork_1.2.0            webshot_0.5.5             
[47] gridExtra_2.3              rsample_1.2.1             
[49] lubridate_1.9.3            forcats_1.0.0             
[51] stringr_1.5.1              dplyr_1.1.4               
[53] purrr_1.0.2                readr_2.1.5               
[55] tidyr_1.3.1                tibble_3.2.1              
[57] tidyverse_2.0.0            boot_1.3-30               
[59] gtsummary_1.7.2            flextable_0.9.5           
[61] kableExtra_1.4.0           knitr_1.45                
[63] plotrix_3.8-4              pec_2023.04.12            
[65] prodlim_2023.08.28         pseudo_1.4.3              
[67] geepack_1.3.10             KMsurv_0.1-5              
[69] mstate_0.3.2               riskRegression_2023.12.21 
[71] cmprsk_2.2-11              rms_6.8-0                 
[73] Hmisc_5.1-2                survminer_0.4.9           
[75] ggpubr_0.6.0               ggplot2_3.5.0             
[77] survival_3.5-8             skimr_2.1.5               
[79] here_1.0.1                

loaded via a namespace (and not attached):
  [1] R.methodsS3_1.8.2       gld_2.6.6               pacman_0.5.1           
  [4] urlchecker_1.0.1        nnet_7.3-19             TH.data_1.1-2          
  [7] vctrs_0.6.5             png_0.1-8               digest_0.6.35          
 [10] shape_1.4.6.1           proxy_0.4-27            Exact_3.2              
 [13] httpcode_0.3.0          parallelly_1.37.1       MASS_7.3-60.0.1        
 [16] fontLiberation_0.1.0    httpuv_1.6.15           foreach_1.5.2          
 [19] withr_3.0.0             xfun_0.43               ellipsis_0.3.2         
 [22] memoise_2.0.1           crul_1.4.0              MatrixModels_0.5-3     
 [25] profvis_0.3.8           systemfonts_1.0.6       ragg_1.3.0             
 [28] R.oo_1.26.0             DEoptimR_1.1-3          Formula_1.2-5          
 [31] promises_1.2.1          httr_1.4.7              rstatix_0.7.2          
 [34] globals_0.16.3          rstudioapi_0.16.0       pan_1.9                
 [37] miniUI_0.1.1.1          generics_0.1.3          base64enc_0.1-3        
 [40] curl_5.2.1              repr_1.1.7              quadprog_1.5-8         
 [43] xtable_1.8-4            evaluate_0.23           hms_1.1.3              
 [46] glmnet_4.1-8            visNetwork_2.1.2        readxl_1.4.3           
 [49] magrittr_2.0.3          lmtest_0.9-40           snakecase_0.11.1       
 [52] later_1.3.2             lattice_0.22-5          future.apply_1.11.2    
 [55] robustbase_0.99-2       SparseM_1.81            lhs_1.1.6              
 [58] class_7.3-22            pillar_1.9.0            nlme_3.1-164           
 [61] iterators_1.0.14        GPfit_1.0-8             compiler_4.3.3         
 [64] stringi_1.8.3           gower_1.0.1             jomo_2.7-6             
 [67] minqa_1.2.6             crayon_1.5.2            abind_1.4-5            
 [70] haven_2.5.4             sp_2.1-3                rootSolve_1.8.2.4      
 [73] sandwich_3.1-0          codetools_0.2-19        multcomp_1.4-25        
 [76] textshaping_0.3.7       openssl_2.1.1           e1071_1.7-14           
 [79] lmom_3.0                mime_0.12               Rcpp_1.0.12            
 [82] quantreg_5.97           DiceDesign_1.10         cellranger_1.1.0       
 [85] gridtext_0.1.5          utf8_1.2.4              lme4_1.1-35.2          
 [88] fs_1.6.3                listenv_0.9.1           checkmate_2.3.1        
 [91] pkgbuild_1.4.4          expm_0.999-9            ggsignif_0.6.4         
 [94] Matrix_1.6-5            tzdb_0.4.0              visdat_0.6.0           
 [97] svglite_2.1.3           pkgconfig_2.0.3         tools_4.3.3            
[100] cachem_1.0.8            viridisLite_0.4.2       numDeriv_2016.8-1.1    
[103] fastmap_1.1.1           rmarkdown_2.26          sdamr_0.2.0            
[106] mets_1.3.4              officer_0.6.5           carData_3.0-5          
[109] rpart_4.1.23            farver_2.1.1            yaml_2.3.8             
[112] foreign_0.8-86          cli_3.6.2               lifecycle_1.0.4        
[115] askpass_1.2.0           mvtnorm_1.2-4           lava_1.8.0             
[118] sessioninfo_1.2.2       backports_1.4.1         timechange_0.3.0       
[121] gtable_0.3.4            jsonlite_1.8.8          mitml_0.4-5            
[124] pwr_1.3-0               zip_2.3.1               ranger_0.16.0          
[127] highr_0.10              polspline_1.1.24        survMisc_0.5.6         
[130] R.utils_2.12.3          timeDate_4032.109       shiny_1.8.1            
[133] gfonts_0.2.0            timereg_2.0.5           broom.helpers_1.14.0   
[136] gdtools_0.3.7           rprojroot_2.0.4         R6_2.5.1               
[139] km.ci_0.5-6             vcd_1.4-12              cluster_2.1.6          
[142] pkgload_1.3.4           ipred_0.9-14            nloptr_2.0.3           
[145] tidyselect_1.2.1        htmlTable_2.4.2         fontBitstreamVera_0.1.1
[148] car_3.1-2               munsell_0.5.0           laeken_0.5.3           
[151] fontquiver_0.2.1        data.table_1.15.4       htmlwidgets_1.6.4      
[154] RColorBrewer_1.1-3      rlang_1.1.3             uuid_1.2-0             
[157] remotes_2.5.0           fansi_1.0.6             hardhat_1.3.1